From 87c82dbbf0147d57b066ad33bc5c55d1cc8fe983 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 11 Nov 2021 15:05:07 -0800 Subject: [PATCH] more --- Makefile | 2 +- cpdf.ml | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 94980cb..bc4bfa3 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ OCAMLLDFLAGS = -g all : native-code native-code-library byte-code-library top htdoc clean :: - rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf *.cmt *.cmti *.json test/*.pdf debug/*.pdf + rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf decomp.pdf *.cmt *.cmti *.json test/*.pdf debug/*.pdf DOC_FILES = cpdferror.mli cpdfjson.mli cpdfstrftime.mli cpdfcoord.mli \ cpdfattach.mli cpdfpagespec.mli cpdfposition.mli cpdf.mli \ diff --git a/cpdf.ml b/cpdf.ml index 21eb6e5..eb9909b 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -1121,9 +1121,17 @@ let print_fonts pdf range = (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever is in the font (for existing fonts). *) let charcodes_of_utf8 pdf font s = - let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in + let extractor = Pdftext.charcode_extractor_of_font ~debug:false pdf font in let codepoints = Pdftext.codepoints_of_utf8 s in - implode (map char_of_int (option_map extractor codepoints)) + let charcodes = + option_map + (fun codepoint -> + match extractor codepoint with + | Some cc -> Some cc + | None -> Printf.eprintf "Warning: character not found in font for unicode codepoint 0x%X\n" codepoint; None) + codepoints + in + implode (map char_of_int charcodes) (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) let utf8_of_winansi s =