From 34b60f69fa45704841e97afc34682d652ea76fb8 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 1 Nov 2021 17:09:58 +0000 Subject: [PATCH] more --- Changes | 1 + cpdf.ml | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Changes b/Changes index 69112e1..05defbb 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,6 @@ 2.5 (Upcoming 2022) +o Text added in existing fonts is now encoding-aware o New operation -print-font-encoding o New -print-dict-entry operation prints values for a given key o Extend -remove-dict-entry to allow search diff --git a/cpdf.ml b/cpdf.ml index 43f044a..dc943ad 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -1119,17 +1119,15 @@ let print_fonts pdf = (* \section{Superimpose text, page numbers etc.} *) - -(* Process UTF8 text to /WinAnsiEncoding string. *) -let winansi_of_utf8 s = - (*flprint "winansi_of_utf8:"; - iter (Printf.printf "%C ") (explode s); - flprint "\n";*) - let extractor = Pdftext.charcode_extractor_of_encoding Pdftext.WinAnsiEncoding - and codepoints = Pdftext.codepoints_of_utf8 s in - (*flprint "codepoints after Pdftext.codepoints_of_utf8\n"; - iter (Printf.printf "%i ") codepoints; - flprint "\ndone\n";*) +(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever + is in the font (for existing fonts). *) +let charcodes_of_utf8 s = + let encoding = + (* FIXME: read from font *) + Pdftext.WinAnsiEncoding + in + let extractor = Pdftext.charcode_extractor_of_encoding encoding in + let codepoints = Pdftext.codepoints_of_utf8 s in implode (map char_of_int (option_map extractor codepoints)) (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) @@ -1550,7 +1548,7 @@ let Printf.printf "relative-to-cropbox = %b" cropbox; flprint "\n";*) ops_metrics := []; - let text = winansi_of_utf8 text in + let text = charcodes_of_utf8 text in let lines = map unescape_string (split_at_newline text) in let pdf = ref pdf in let voffset =