cpdf-source/cpdftexttopdf.ml

70 lines
2.6 KiB
OCaml
Raw Normal View History

2021-12-07 00:55:46 +01:00
open Pdfutil
2024-02-27 16:57:31 +01:00
let of_utf8_with_newlines fontpack fontsize t =
2021-12-07 00:55:46 +01:00
let items = ref [] in
2023-06-13 21:23:59 +02:00
let currfont = ref ~-1 in
2022-09-21 16:21:57 +02:00
let codepoints = Pdftext.codepoints_of_utf8 t in
2022-10-20 15:28:14 +02:00
let currtext = ref [] in
let process_codepoints cs =
iter
2022-09-21 16:21:57 +02:00
(fun u ->
2022-10-19 15:47:20 +02:00
match Cpdfembed.get_char fontpack u with
2022-10-20 15:28:14 +02:00
| Some (c, n, f) ->
begin if n <> !currfont then
begin
if !currtext <> [] then items := Cpdftype.Text (rev !currtext)::!items;
currtext := [];
currfont := n;
2023-07-20 14:35:06 +02:00
items := Cpdftype.Font (string_of_int n, f, fontsize)::!items;
2022-10-20 15:28:14 +02:00
currtext := char_of_int c::!currtext;
end
else
currtext := char_of_int c::!currtext
end
| None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u)
cs;
items := Cpdftype.Text (rev !currtext)::!items
2022-09-21 16:21:57 +02:00
in
2022-10-20 15:28:14 +02:00
let buf = ref [] in
2022-09-21 16:21:57 +02:00
List.iter
2021-12-07 00:55:46 +01:00
(function
2022-09-21 16:21:57 +02:00
| 10 (*'\n'*) ->
let c = rev !buf in
2022-10-20 15:28:14 +02:00
if c <> [] then process_codepoints c;
2021-12-07 00:55:46 +01:00
items := Cpdftype.NewLine::!items;
2023-06-27 16:54:15 +02:00
currtext := [];
2022-09-21 16:21:57 +02:00
buf := []
| 13 (*'\r'*) -> ()
2021-12-07 00:55:46 +01:00
| x ->
2022-09-21 16:21:57 +02:00
buf := x::!buf)
codepoints;
2021-12-07 00:55:46 +01:00
(* Do last one *)
2022-09-21 16:21:57 +02:00
let c = rev !buf in
2022-10-20 15:28:14 +02:00
if c <> [] then process_codepoints c;
2022-09-21 16:21:57 +02:00
rev !items
2021-12-07 00:55:46 +01:00
let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text =
let process_struct_tree =
process_struct_tree || subformat = Some Cpdfua.PDFUA1 || subformat = Some Cpdfua.PDFUA2
in
2022-10-19 14:48:13 +02:00
let pdf = Pdf.empty () in
2023-06-13 16:21:23 +02:00
let codepoints = setify (Pdftext.codepoints_of_utf8 (Pdfio.string_of_bytes text)) in
let fontpack =
2022-10-19 14:48:13 +02:00
match font with
| Cpdfembed.PreMadeFontPack t -> t
2022-10-19 14:48:13 +02:00
| Cpdfembed.EmbedInfo {fontfile; fontname; encoding} ->
Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding
| Cpdfembed.ExistingNamedFont ->
raise (Pdf.PDFError "Can't use existing named font for text-to-PDF")
2022-10-19 14:48:13 +02:00
in
2022-10-20 15:28:14 +02:00
let instrs = of_utf8_with_newlines fontpack fontsize (Pdfio.string_of_bytes text) in
2023-06-13 21:23:59 +02:00
let margin = Pdfunits.points (Pdfpaper.width papersize) (Pdfpaper.unit papersize) /. 15. in
let instrs =
if instrs = [] then [] else
let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) instrs) in
[firstfont; Cpdftype.BeginDocument] @ instrs
in
let pages = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in
2021-12-07 00:55:46 +01:00
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf