This commit is contained in:
John Whitington 2022-09-21 15:21:57 +01:00
parent 8f4e1c01a6
commit d030bc7e74
1 changed files with 24 additions and 17 deletions

View File

@ -1,28 +1,35 @@
open Pdfutil open Pdfutil
(* FIXME Make this use the charcode extractor for the font, because cpdftype let rec of_utf8_with_newlines charcode_extractor t =
assumes charcodes. *)
let rec of_utf8_with_newlines t =
let items = ref [] in let items = ref [] in
let buf = Buffer.create 256 in let buf = ref [] in
String.iter let codepoints = Pdftext.codepoints_of_utf8 t in
let charcodes_of_codepoints cs =
option_map
(fun u ->
match charcode_extractor u with
| Some c -> Some (char_of_int c)
| None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u; None)
cs
in
List.iter
(function (function
| '\n' -> | 10 (*'\n'*) ->
let c = Buffer.contents buf in let c = rev !buf in
if c <> "" then items := Cpdftype.Text (explode c)::!items; if c <> [] then items := Cpdftype.Text (charcodes_of_codepoints c)::!items;
items := Cpdftype.NewLine::!items; items := Cpdftype.NewLine::!items;
Buffer.clear buf buf := []
| '\r' -> () | 13 (*'\r'*) -> ()
| x -> | x ->
Buffer.add_char buf x) buf := x::!buf)
t; codepoints;
(* Do last one *) (* Do last one *)
let c = Buffer.contents buf in let c = rev !buf in
if c <> "" then items := Text (explode c)::!items; if c <> [] then items := Text (charcodes_of_codepoints c)::!items;
rev !items rev !items
let typeset ~papersize ~font ~fontsize text = let typeset ~papersize ~font ~fontsize text =
let charcode_extractor = Pdftext.charcode_extractor_of_font_real font in
let pdf = Pdf.empty () in let pdf = Pdf.empty () in
let margin = let margin =
Pdfunits.convert Pdfunits.convert
@ -32,7 +39,7 @@ let typeset ~papersize ~font ~fontsize text =
Cpdftype.typeset Cpdftype.typeset
margin margin margin margin papersize pdf margin margin margin margin papersize pdf
([Cpdftype.Font (font, fontsize); Cpdftype.BeginDocument] @ ([Cpdftype.Font (font, fontsize); Cpdftype.BeginDocument] @
of_utf8_with_newlines (Pdfio.string_of_bytes text)) of_utf8_with_newlines charcode_extractor (Pdfio.string_of_bytes text))
in in
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf Pdfpage.add_root pageroot [] pdf