2021-12-07 00:55:46 +01:00
|
|
|
open Pdfutil
|
|
|
|
|
2022-09-21 21:45:11 +02:00
|
|
|
let rec of_utf8_with_newlines used charcode_extractor t =
|
2021-12-07 00:55:46 +01:00
|
|
|
let items = ref [] in
|
2022-09-21 16:21:57 +02:00
|
|
|
let buf = ref [] in
|
|
|
|
let codepoints = Pdftext.codepoints_of_utf8 t in
|
|
|
|
let charcodes_of_codepoints cs =
|
|
|
|
option_map
|
|
|
|
(fun u ->
|
|
|
|
match charcode_extractor u with
|
2022-09-21 21:45:11 +02:00
|
|
|
| Some c -> Hashtbl.replace used c (); Some (char_of_int c)
|
2022-09-21 16:21:57 +02:00
|
|
|
| None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u; None)
|
|
|
|
cs
|
|
|
|
in
|
|
|
|
List.iter
|
2021-12-07 00:55:46 +01:00
|
|
|
(function
|
2022-09-21 16:21:57 +02:00
|
|
|
| 10 (*'\n'*) ->
|
|
|
|
let c = rev !buf in
|
|
|
|
if c <> [] then items := Cpdftype.Text (charcodes_of_codepoints c)::!items;
|
2021-12-07 00:55:46 +01:00
|
|
|
items := Cpdftype.NewLine::!items;
|
2022-09-21 16:21:57 +02:00
|
|
|
buf := []
|
|
|
|
| 13 (*'\r'*) -> ()
|
2021-12-07 00:55:46 +01:00
|
|
|
| x ->
|
2022-09-21 16:21:57 +02:00
|
|
|
buf := x::!buf)
|
|
|
|
codepoints;
|
2021-12-07 00:55:46 +01:00
|
|
|
(* Do last one *)
|
2022-09-21 16:21:57 +02:00
|
|
|
let c = rev !buf in
|
|
|
|
if c <> [] then items := Text (charcodes_of_codepoints c)::!items;
|
|
|
|
rev !items
|
2021-12-07 00:55:46 +01:00
|
|
|
|
2022-09-21 18:52:15 +02:00
|
|
|
(* The optional pdf argument is for providing a pre-embedded font - this will
|
|
|
|
be removed when we re-embed subsetted? *)
|
2022-09-21 16:30:28 +02:00
|
|
|
let typeset ?pdf ~papersize ~font ~fontsize text =
|
2022-09-21 16:21:57 +02:00
|
|
|
let charcode_extractor = Pdftext.charcode_extractor_of_font_real font in
|
2022-09-21 16:30:28 +02:00
|
|
|
let pdf = match pdf with None -> Pdf.empty () | Some pdf -> pdf in
|
2021-12-29 16:58:03 +01:00
|
|
|
let margin =
|
2022-09-13 18:59:13 +02:00
|
|
|
Pdfunits.convert
|
|
|
|
72. (Pdfpaper.unit papersize) (Pdfunits.PdfPoint) (Pdfpaper.width papersize) /. 15.
|
2021-12-29 16:58:03 +01:00
|
|
|
in
|
2022-09-21 21:45:11 +02:00
|
|
|
let used = null_hash () in
|
2021-12-07 00:55:46 +01:00
|
|
|
let pages =
|
|
|
|
Cpdftype.typeset
|
2021-12-29 16:58:03 +01:00
|
|
|
margin margin margin margin papersize pdf
|
2022-09-13 18:59:13 +02:00
|
|
|
([Cpdftype.Font (font, fontsize); Cpdftype.BeginDocument] @
|
2022-09-21 21:45:11 +02:00
|
|
|
of_utf8_with_newlines used charcode_extractor (Pdfio.string_of_bytes text))
|
2021-12-07 00:55:46 +01:00
|
|
|
in
|
2022-09-21 21:45:11 +02:00
|
|
|
let codes = map fst (list_of_hashtbl used) in
|
|
|
|
Printf.printf "%i codes used\n" (length codes);
|
2021-12-07 00:55:46 +01:00
|
|
|
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
|
|
|
|
Pdfpage.add_root pageroot [] pdf
|