more
This commit is contained in:
parent
680e66505e
commit
bb5fae55d2
14
cpdf.ml
14
cpdf.ml
|
@ -1125,8 +1125,8 @@ let print_fonts pdf range =
|
||||||
|
|
||||||
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
|
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
|
||||||
is in the font (for existing fonts). *)
|
is in the font (for existing fonts). *)
|
||||||
let charcodes_of_utf8 pdf font s =
|
let charcodes_of_utf8 font s =
|
||||||
let extractor = Pdftext.charcode_extractor_of_font ~debug:false pdf font in
|
let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in
|
||||||
let codepoints = Pdftext.codepoints_of_utf8 s in
|
let codepoints = Pdftext.codepoints_of_utf8 s in
|
||||||
let charcodes =
|
let charcodes =
|
||||||
option_map
|
option_map
|
||||||
|
@ -1141,12 +1141,8 @@ let charcodes_of_utf8 pdf font s =
|
||||||
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
|
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
|
||||||
let utf8_of_winansi s =
|
let utf8_of_winansi s =
|
||||||
let text_extractor =
|
let text_extractor =
|
||||||
Pdftext.text_extractor_of_font
|
Pdftext.text_extractor_of_font_real
|
||||||
(Pdf.empty ())
|
(Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
|
||||||
(Pdf.Dictionary
|
|
||||||
[("/BaseFont", Pdf.Name "/TimesRoman");
|
|
||||||
("/Subtype", Pdf.Name "/Type1");
|
|
||||||
("/Encoding", Pdf.Name "/WinAnsiEncoding")])
|
|
||||||
in
|
in
|
||||||
let codepoints = Pdftext.codepoints_of_text text_extractor s in
|
let codepoints = Pdftext.codepoints_of_text text_extractor s in
|
||||||
Pdftext.utf8_of_codepoints codepoints
|
Pdftext.utf8_of_codepoints codepoints
|
||||||
|
@ -1601,7 +1597,7 @@ let
|
||||||
end
|
end
|
||||||
| _ -> failwith "addtext: font dictionary not present"
|
| _ -> failwith "addtext: font dictionary not present"
|
||||||
in
|
in
|
||||||
let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in
|
let text = if raw then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text in
|
||||||
let lines = map unescape_string (split_at_newline text) in
|
let lines = map unescape_string (split_at_newline text) in
|
||||||
let pdf = ref pdf in
|
let pdf = ref pdf in
|
||||||
let voffset =
|
let voffset =
|
||||||
|
|
|
@ -2919,13 +2919,10 @@ let collate (names, pdfs, ranges) =
|
||||||
split3 (rev !nis)
|
split3 (rev !nis)
|
||||||
|
|
||||||
let of_utf8 (f, fontsize) t =
|
let of_utf8 (f, fontsize) t =
|
||||||
let pdf = Pdf.empty () in
|
Pdftext.codepoints_of_utf8 t
|
||||||
let fontdict = Pdftext.write_font pdf f in
|
|> option_map (Pdftext.charcode_extractor_of_font_real f)
|
||||||
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
|
|> map char_of_int
|
||||||
Pdftext.codepoints_of_utf8 t
|
|> implode
|
||||||
|> option_map extractor
|
|
||||||
|> map char_of_int
|
|
||||||
|> implode
|
|
||||||
|
|
||||||
let of_pdfdocencoding (f, fontsize) t =
|
let of_pdfdocencoding (f, fontsize) t =
|
||||||
of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t)
|
of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t)
|
||||||
|
@ -2948,9 +2945,13 @@ let rec of_utf8_with_newlines t =
|
||||||
if c <> "" then items := Text (explode c)::!items;
|
if c <> "" then items := Text (explode c)::!items;
|
||||||
rev !items
|
rev !items
|
||||||
|
|
||||||
|
(* FIXME margins, hyphenation of too-long words, efficiency *)
|
||||||
let typeset text =
|
let typeset text =
|
||||||
let pdf = Pdf.empty () in
|
let pdf = Pdf.empty () in
|
||||||
let f = (Pdftext.StandardFont (Pdftext.Courier, Pdftext.WinAnsiEncoding), 12.) in
|
let f =
|
||||||
|
(begin match args.font with StandardFont sf -> Pdftext.StandardFont (sf, Pdftext.WinAnsiEncoding) | _ -> failwith "typeset bad font" end,
|
||||||
|
args.fontsize)
|
||||||
|
in
|
||||||
let pages =
|
let pages =
|
||||||
Cpdftype.typeset
|
Cpdftype.typeset
|
||||||
20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text))
|
20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text))
|
||||||
|
@ -2973,13 +2974,22 @@ let typeset_table_of_contents ~font pdf =
|
||||||
Pdfpaper.make Pdfunits.PdfPoint width height
|
Pdfpaper.make Pdfunits.PdfPoint width height
|
||||||
in
|
in
|
||||||
let lines =
|
let lines =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
map
|
map
|
||||||
(fun mark ->
|
(fun mark ->
|
||||||
[Cpdftype.BeginDest mark.Pdfmarks.target;
|
let label =
|
||||||
Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.};
|
let labels = Pdfpagelabels.read pdf in
|
||||||
Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text));
|
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
|
||||||
Cpdftype.EndDest;
|
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
|
||||||
Cpdftype.NewLine])
|
in
|
||||||
|
[Cpdftype.BeginDest mark.Pdfmarks.target;
|
||||||
|
Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.};
|
||||||
|
Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text ^ " " ^ of_pdfdocencoding f label));
|
||||||
|
(*Cpdftype.Text [' '];
|
||||||
|
Cpdftype.Text (explode (of_pdfdocencoding f label));*)
|
||||||
|
Cpdftype.EndDest;
|
||||||
|
Cpdftype.NewLine])
|
||||||
(Pdfmarks.read_bookmarks pdf)
|
(Pdfmarks.read_bookmarks pdf)
|
||||||
in
|
in
|
||||||
let toc_pages =
|
let toc_pages =
|
||||||
|
|
|
@ -156,7 +156,7 @@ let print_font_table pdf fontname pagenumber =
|
||||||
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs
|
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs
|
||||||
| _ -> None
|
| _ -> None
|
||||||
in
|
in
|
||||||
let extractor = Pdftext.text_extractor_of_font pdf font in
|
let extractor = Pdftext.text_extractor_of_font_real pdftextfont in
|
||||||
let unicodedata = Cpdfunicodedata.unicodedata () in
|
let unicodedata = Cpdfunicodedata.unicodedata () in
|
||||||
let unicodetable = Hashtbl.create 16000 in
|
let unicodetable = Hashtbl.create 16000 in
|
||||||
iter
|
iter
|
||||||
|
|
16
cpdftype.ml
16
cpdftype.ml
|
@ -1,11 +1,6 @@
|
||||||
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
|
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
|
||||||
to lay it out, paginate it, and so on. It is then typeset to produce a list
|
to lay it out, paginate it, and so on. It is then typeset to produce a list
|
||||||
of pages *)
|
of pages *)
|
||||||
|
|
||||||
(* FIXME We need to make Pdfstandard14 width calculations much more efficient
|
|
||||||
by caching so that we are not making a table up for each character! *)
|
|
||||||
(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
|
|
||||||
(* FIXME Fix up charcode / text extractors to take fonts not fontdicts *)
|
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
|
|
||||||
(* Glue *)
|
(* Glue *)
|
||||||
|
@ -55,8 +50,15 @@ let initial_state () =
|
||||||
dest = None}
|
dest = None}
|
||||||
|
|
||||||
let font_widths f fontsize =
|
let font_widths f fontsize =
|
||||||
let w = fontsize *. (600. /. 1000.) in
|
let stdfont =
|
||||||
Array.make 256 w
|
match f with Pdftext.StandardFont (sf, _) -> sf | _ -> failwith "not a standard font"
|
||||||
|
in
|
||||||
|
Array.init
|
||||||
|
256
|
||||||
|
(fun x ->
|
||||||
|
fontsize
|
||||||
|
*. float_of_int (Pdfstandard14.textwidth false Pdftext.WinAnsiEncoding stdfont (string_of_char (char_of_int x)))
|
||||||
|
/. 1000.)
|
||||||
|
|
||||||
let width_of_string ws s =
|
let width_of_string ws s =
|
||||||
let w = ref 0. in
|
let w = ref 0. in
|
||||||
|
|
Loading…
Reference in New Issue