This commit is contained in:
John Whitington 2021-11-11 15:05:07 -08:00
parent 2fb55d514d
commit 87c82dbbf0
2 changed files with 11 additions and 3 deletions

View File

@ -18,7 +18,7 @@ OCAMLLDFLAGS = -g
all : native-code native-code-library byte-code-library top htdoc all : native-code native-code-library byte-code-library top htdoc
clean :: clean ::
rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf *.cmt *.cmti *.json test/*.pdf debug/*.pdf rm -rf doc foo foo2 out.pdf out2.pdf foo.pdf decomp.pdf *.cmt *.cmti *.json test/*.pdf debug/*.pdf
DOC_FILES = cpdferror.mli cpdfjson.mli cpdfstrftime.mli cpdfcoord.mli \ DOC_FILES = cpdferror.mli cpdfjson.mli cpdfstrftime.mli cpdfcoord.mli \
cpdfattach.mli cpdfpagespec.mli cpdfposition.mli cpdf.mli \ cpdfattach.mli cpdfpagespec.mli cpdfposition.mli cpdf.mli \

12
cpdf.ml
View File

@ -1121,9 +1121,17 @@ let print_fonts pdf range =
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *) is in the font (for existing fonts). *)
let charcodes_of_utf8 pdf font s = let charcodes_of_utf8 pdf font s =
let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in let extractor = Pdftext.charcode_extractor_of_font ~debug:false pdf font in
let codepoints = Pdftext.codepoints_of_utf8 s in let codepoints = Pdftext.codepoints_of_utf8 s in
implode (map char_of_int (option_map extractor codepoints)) let charcodes =
option_map
(fun codepoint ->
match extractor codepoint with
| Some cc -> Some cc
| None -> Printf.eprintf "Warning: character not found in font for unicode codepoint 0x%X\n" codepoint; None)
codepoints
in
implode (map char_of_int charcodes)
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
let utf8_of_winansi s = let utf8_of_winansi s =