cpdf-source/cpdftexttopdf.ml

65 lines
2.2 KiB
OCaml
Raw Normal View History

2021-12-06 15:55:46 -08:00
open Pdfutil
2022-10-19 14:36:26 +01:00
(* Return set of unicode characters in this text *)
let used_characters t =
let codepoints = Pdftext.codepoints_of_utf8 t in
setify codepoints
(* Just first font, expand later. Move into cpdfembed? *)
let get_char (fonts, table) u =
match Hashtbl.find table u with
| (n, charcode) -> Some charcode
| exception Not_found -> None
let rec of_utf8_with_newlines fontpack t =
2021-12-06 15:55:46 -08:00
let items = ref [] in
2022-09-21 15:21:57 +01:00
let buf = ref [] in
let codepoints = Pdftext.codepoints_of_utf8 t in
let charcodes_of_codepoints cs =
option_map
(fun u ->
2022-10-19 14:36:26 +01:00
match get_char fontpack u with
| Some c -> Some (char_of_int c)
2022-09-21 15:21:57 +01:00
| None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u; None)
cs
in
List.iter
2021-12-06 15:55:46 -08:00
(function
2022-09-21 15:21:57 +01:00
| 10 (*'\n'*) ->
let c = rev !buf in
if c <> [] then items := Cpdftype.Text (charcodes_of_codepoints c)::!items;
2021-12-06 15:55:46 -08:00
items := Cpdftype.NewLine::!items;
2022-09-21 15:21:57 +01:00
buf := []
| 13 (*'\r'*) -> ()
2021-12-06 15:55:46 -08:00
| x ->
2022-09-21 15:21:57 +01:00
buf := x::!buf)
codepoints;
2021-12-06 15:55:46 -08:00
(* Do last one *)
2022-09-21 15:21:57 +01:00
let c = rev !buf in
if c <> [] then items := Text (charcodes_of_codepoints c)::!items;
rev !items
2021-12-06 15:55:46 -08:00
2022-10-19 13:48:13 +01:00
let typeset ~papersize ~font ~fontsize text =
let pdf = Pdf.empty () in
2022-10-19 14:36:26 +01:00
let codepoints = used_characters (Pdfio.string_of_bytes text) in
let font, fontpack =
2022-10-19 13:48:13 +01:00
match font with
2022-10-19 14:36:26 +01:00
| Cpdfembed.PreMadeFontPack t -> (hd (fst t), t)
2022-10-19 13:48:13 +01:00
| Cpdfembed.EmbedInfo {fontfile; fontname; encoding} ->
2022-10-19 14:36:26 +01:00
let embedded = Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding in
(hd (fst embedded), embedded)
2022-10-19 13:48:13 +01:00
| Cpdfembed.ExistingNamedFont _ -> raise (Pdf.PDFError "Can't use existing named font for text-to-PDF")
in
2022-10-19 14:36:26 +01:00
let instrs = of_utf8_with_newlines fontpack (Pdfio.string_of_bytes text) in
2021-12-29 15:58:03 +00:00
let margin =
2022-09-13 17:59:13 +01:00
Pdfunits.convert
72. (Pdfpaper.unit papersize) (Pdfunits.PdfPoint) (Pdfpaper.width papersize) /. 15.
2021-12-29 15:58:03 +00:00
in
2021-12-06 15:55:46 -08:00
let pages =
Cpdftype.typeset
2021-12-29 15:58:03 +00:00
margin margin margin margin papersize pdf
2022-09-23 19:06:07 +01:00
([Cpdftype.Font (font, fontsize); Cpdftype.BeginDocument] @ instrs)
2021-12-06 15:55:46 -08:00
in
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf