From be610916d421c654289dd056670fd224a687b006 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 19 Oct 2022 14:36:26 +0100 Subject: [PATCH] more --- cpdfcommand.ml | 4 ++-- cpdfembed.ml | 2 +- cpdftexttopdf.ml | 30 ++++++++++++++++++++---------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 4c28f80..603c4af 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3068,7 +3068,7 @@ let embed_font () = Pdfio.bytes_of_string (contents_of_file (Filename.concat dirname filename)), Filename.remove_extension filename in - Cpdfembed.EmbedInfo {fontfile; fontname; fontencoding = args.fontencoding} + Cpdfembed.EmbedInfo {fontfile; fontname; encoding = args.fontencoding} with e -> error (Printf.sprintf "Can't load font for embedding: %s\n" (Printexc.to_string e)) end @@ -3078,7 +3078,7 @@ let embed_font () = | OtherFont f -> ExistingNamedFont f | FontToEmbed fontfile -> - EmbedInfo {fontfile; fontname = args.fontname; fontencoding = args.fontencoding} + EmbedInfo {fontfile; fontname = args.fontname; encoding = args.fontencoding} (* Main function *) let go () = diff --git a/cpdfembed.ml b/cpdfembed.ml index 6643012..d483fed 100644 --- a/cpdfembed.ml +++ b/cpdfembed.ml @@ -1,7 +1,7 @@ (* Embed a font *) open Pdfutil -type t = Pdftext.font list * (int, int * int) Hashtbl.t +type t = Pdftext.font list * (int, int * int) Hashtbl.t (* Table returns font number and charcode for given unicode codepoint *) type cpdffont = PreMadeFontPack of t diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml index e7d1894..99014c0 100644 --- a/cpdftexttopdf.ml +++ b/cpdftexttopdf.ml @@ -1,14 +1,25 @@ open Pdfutil -let rec of_utf8_with_newlines used charcode_extractor t = +(* Return set of unicode characters in this text *) +let used_characters t = + let codepoints = Pdftext.codepoints_of_utf8 t in + setify codepoints + +(* Just first font, expand later. Move into cpdfembed? *) +let get_char (fonts, table) u = + match Hashtbl.find table u with + | (n, charcode) -> Some charcode + | exception Not_found -> None + +let rec of_utf8_with_newlines fontpack t = let items = ref [] in let buf = ref [] in let codepoints = Pdftext.codepoints_of_utf8 t in let charcodes_of_codepoints cs = option_map (fun u -> - match charcode_extractor u with - | Some c -> Hashtbl.replace used c (); Some (char_of_int c) + match get_char fontpack u with + | Some c -> Some (char_of_int c) | None -> Printf.printf "No glyph for unicode U+%04X in this font\n" u; None) cs in @@ -30,21 +41,20 @@ let rec of_utf8_with_newlines used charcode_extractor t = let typeset ~papersize ~font ~fontsize text = let pdf = Pdf.empty () in - let font = + let codepoints = used_characters (Pdfio.string_of_bytes text) in + let font, fontpack = match font with - | Cpdfembed.PreMadeFontPack t -> hd (fst t) + | Cpdfembed.PreMadeFontPack t -> (hd (fst t), t) | Cpdfembed.EmbedInfo {fontfile; fontname; encoding} -> - hd (fst (Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding)) + let embedded = Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding in + (hd (fst embedded), embedded) | Cpdfembed.ExistingNamedFont _ -> raise (Pdf.PDFError "Can't use existing named font for text-to-PDF") in - let charcode_extractor = Pdftext.charcode_extractor_of_font_real font in + let instrs = of_utf8_with_newlines fontpack (Pdfio.string_of_bytes text) in let margin = Pdfunits.convert 72. (Pdfpaper.unit papersize) (Pdfunits.PdfPoint) (Pdfpaper.width papersize) /. 15. in - let used = null_hash () in - let instrs = of_utf8_with_newlines used charcode_extractor (Pdfio.string_of_bytes text) in - let codepoints = map fst (list_of_hashtbl used) in let pages = Cpdftype.typeset margin margin margin margin papersize pdf