Some font work

This commit is contained in:
John Whitington 2014-09-08 17:55:14 +01:00
parent 98a2164f38
commit 5e09b88ed7
4 changed files with 76 additions and 19 deletions

View File

@ -5,7 +5,7 @@ SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
RESULT = cpdf RESULT = cpdf
ANNOTATE = true ANNOTATE = true
PACKS = camlpdf PACKS = camlpdf xml-light
OCAMLNCFLAGS = -g OCAMLNCFLAGS = -g
OCAMLBCFLAGS = -g OCAMLBCFLAGS = -g

89
cpdf.ml
View File

@ -1322,16 +1322,18 @@ let get_metadata pdf =
| Some ((Pdf.Stream _) as s) -> | Some ((Pdf.Stream _) as s) ->
Pdf.getstream s; Pdf.getstream s;
begin match s with begin match s with
| Pdf.Stream {contents = (_, Pdf.Got data)} -> data | Pdf.Stream {contents = (_, Pdf.Got data)} -> Some data
| _ -> assert false | _ -> assert false
end end
| _ -> mkbytes 0 | _ -> None
let print_metadata pdf = let print_metadata pdf =
let data = get_metadata pdf in match get_metadata pdf with
for x = 0 to bytes_size data - 1 do None -> ()
Printf.printf "%c" (char_of_int (bget data x)) | Some data ->
done for x = 0 to bytes_size data - 1 do
Printf.printf "%c" (char_of_int (bget data x))
done
(* \section{Print font data} *) (* \section{Print font data} *)
let list_font pdf page (name, dict) = let list_font pdf page (name, dict) =
@ -1626,6 +1628,59 @@ let find_justification_offsets longest_w w position = function
| ReverseDiagonal -> 0. | ReverseDiagonal -> 0.
end end
(* Lex an integer from the table *)
let extract_num header s =
match Pdfgenlex.lex_string (Hashtbl.find header s) with
[Pdfgenlex.LexInt i] -> i
| _ -> raise (Failure "extract_num")
let extract_fontbbox header s =
match Pdfgenlex.lex_string (Hashtbl.find header s) with
[Pdfgenlex.LexInt a;
Pdfgenlex.LexInt b;
Pdfgenlex.LexInt c;
Pdfgenlex.LexInt d] ->
[Pdf.Integer a; Pdf.Integer b; Pdf.Integer c; Pdf.Integer d]
| _ -> raise (Failure "extract_fontbbox")
let extract_widths width_data = []
let extract_firstlast header = (0, 0)
let make_font fontname =
let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in
let header, width_data, _ = Pdfstandard14.afm_data font in
let widths = extract_widths width_data
and firstchar, lastchar = extract_firstlast header
and flags = Pdfstandard14.flags_of_standard_font font
and fontbbox = extract_fontbbox header "FontBBox"
and italicangle = extract_num header "ItalicAngle"
and ascent = extract_num header "Ascender"
and descent = extract_num header "Descender"
and capheight = extract_num header "CapHeight"
and stemv = Pdfstandard14.stemv_of_standard_font font in
let fontdescriptor =
Pdf.Dictionary
[("/Type", Pdf.Name "/FontDescriptor");
("/FontName", Pdf.Name ("/" ^ fontname));
("/Flags", Pdf.Integer flags);
("/FontBBox", Pdf.Array fontbbox);
("/ItalicAngle", Pdf.Integer italicangle);
("/Ascent", Pdf.Integer ascent);
("/Descent", Pdf.Integer descent);
("/CapHeight", Pdf.Integer capheight);
("/StemV", Pdf.Integer stemv)]
in
Pdf.Dictionary
[("/Type", Pdf.Name "/Font");
("/Encoding", Pdf.Name "/WinAnsiEncoding");
("/Subtype", Pdf.Name "/Type1");
("/BaseFont", Pdf.Name ("/" ^ fontname));
("/FirstChar", Pdf.Integer firstchar);
("/LastChar", Pdf.Integer lastchar);
("/Widths", Pdf.Array widths);
("/FontDescriptor", fontdescriptor)]
let addtext let addtext
metrics lines linewidth outline fast colour fontname bates fontsize font metrics lines linewidth outline fast colour fontname bates fontsize font
underneath position hoffset voffset text pages orientation cropbox opacity underneath position hoffset voffset text pages orientation cropbox opacity
@ -1720,15 +1775,10 @@ let addtext
let newresources = let newresources =
match font with match font with
| Some _ -> | Some _ ->
let thefont = let newfontdict =
Pdf.Dictionary Pdf.add_dict_entry fontdict unique_fontname (make_font fontname)
[("/Type", Pdf.Name "/Font");
("/Encoding", Pdf.Name "/WinAnsiEncoding");
("/Subtype", Pdf.Name "/Type1");
("/BaseFont", Pdf.Name ("/" ^ fontname))]
in in
let newfontdict = Pdf.add_dict_entry fontdict unique_fontname thefont in Pdf.add_dict_entry resources' "/Font" newfontdict
Pdf.add_dict_entry resources' "/Font" newfontdict
| None -> page.Pdfpage.resources | None -> page.Pdfpage.resources
in in
let page = {page with Pdfpage.resources = newresources} in let page = {page with Pdfpage.resources = newresources} in
@ -2624,6 +2674,14 @@ let get_info_utf8 pdf =
| Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s | Some (Pdf.String s) -> Pdftext.utf8_of_pdfdocstring s
| _ -> "") | _ -> "")
let output_xml_info pdf =
match get_metadata pdf with
None -> ()
| Some metadata ->
print_string (string_of_bytes metadata);
let parsed = Xml.parse_string (string_of_bytes metadata) in
print_string (Xml.to_string parsed)
let output_info encoding pdf = let output_info encoding pdf =
let getstring = let getstring =
match encoding with match encoding with
@ -2640,7 +2698,8 @@ let output_info encoding pdf =
Printf.printf "Creator: %s\n" (getstring "/Creator"); Printf.printf "Creator: %s\n" (getstring "/Creator");
Printf.printf "Producer: %s\n" (getstring "/Producer"); Printf.printf "Producer: %s\n" (getstring "/Producer");
Printf.printf "Created: %s\n" (getstring "/CreationDate"); Printf.printf "Created: %s\n" (getstring "/CreationDate");
Printf.printf "Modified: %s\n" (getstring "/ModDate") Printf.printf "Modified: %s\n" (getstring "/ModDate");
output_xml_info pdf
(* \section{Blacken text} *) (* \section{Blacken text} *)

View File

@ -163,7 +163,7 @@ val set_metadata_from_bytes : bool -> Pdfio.bytes -> Pdf.t -> Pdf.t
val remove_metadata : Pdf.t -> Pdf.t val remove_metadata : Pdf.t -> Pdf.t
(** Extract metadata to a [Pdfio.bytes] *) (** Extract metadata to a [Pdfio.bytes] *)
val get_metadata : Pdf.t -> Pdfio.bytes val get_metadata : Pdf.t -> Pdfio.bytes option
(** Print metadate to stdout *) (** Print metadate to stdout *)
val print_metadata : Pdf.t -> unit val print_metadata : Pdf.t -> unit

View File

@ -2820,8 +2820,6 @@ let go () =
| _ -> error "Clean: No output specified" | _ -> error "Clean: No output specified"
end end
| Some Info -> | Some Info ->
(* Change as of 17th Sept 08 - now presents the pdf undecrypted so that
encryption info can be read out *)
let pdf, inname, input = let pdf, inname, input =
match args.inputs with match args.inputs with
| (InFile inname, _, _, u, o) as input::_ -> | (InFile inname, _, _, u, o) as input::_ ->