diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml index 7e09fd5..34aefba 100644 --- a/cpdftexttopdf.ml +++ b/cpdftexttopdf.ml @@ -1,28 +1,35 @@ open Pdfutil -(* FIXME Make this use the charcode extractor for the font, because cpdftype - assumes charcodes. *) - -let rec of_utf8_with_newlines t = +let rec of_utf8_with_newlines charcode_extractor t = let items = ref [] in - let buf = Buffer.create 256 in - String.iter + let buf = ref [] in + let codepoints = Pdftext.codepoints_of_utf8 t in + let charcodes_of_codepoints cs = + option_map + (fun u -> + match charcode_extractor u with + | Some c -> Some (char_of_int c) + | None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u; None) + cs + in + List.iter (function - | '\n' -> - let c = Buffer.contents buf in - if c <> "" then items := Cpdftype.Text (explode c)::!items; + | 10 (*'\n'*) -> + let c = rev !buf in + if c <> [] then items := Cpdftype.Text (charcodes_of_codepoints c)::!items; items := Cpdftype.NewLine::!items; - Buffer.clear buf - | '\r' -> () + buf := [] + | 13 (*'\r'*) -> () | x -> - Buffer.add_char buf x) - t; + buf := x::!buf) + codepoints; (* Do last one *) - let c = Buffer.contents buf in - if c <> "" then items := Text (explode c)::!items; - rev !items + let c = rev !buf in + if c <> [] then items := Text (charcodes_of_codepoints c)::!items; + rev !items let typeset ~papersize ~font ~fontsize text = + let charcode_extractor = Pdftext.charcode_extractor_of_font_real font in let pdf = Pdf.empty () in let margin = Pdfunits.convert @@ -32,7 +39,7 @@ let typeset ~papersize ~font ~fontsize text = Cpdftype.typeset margin margin margin margin papersize pdf ([Cpdftype.Font (font, fontsize); Cpdftype.BeginDocument] @ - of_utf8_with_newlines (Pdfio.string_of_bytes text)) + of_utf8_with_newlines charcode_extractor (Pdfio.string_of_bytes text)) in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in Pdfpage.add_root pageroot [] pdf