From f071f1805a3c5459a52ade288fb64758a205c3aa Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 1 Nov 2021 13:17:54 +0000 Subject: [PATCH] more --- cpdfjson.ml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cpdfjson.ml b/cpdfjson.ml index 922f66d..a5d584a 100644 --- a/cpdfjson.ml +++ b/cpdfjson.ml @@ -453,22 +453,12 @@ let precombine_page_content pdf = in Pdfpage.change_pages true pdf pages' -(* PDF strings (except /ID in the trailer dictionary) are either PDFDocEncoding -or UTF16BE. Many times the UTF16BE can all be represented in PDFDocEncoding. -In this case, there are just lots of \000 bytes getting in the way making the -JSON hard to edit. So we preprocess such simple UTF16BE strings into -PDFDocEncoding. *) -let preprocess_string s = - if Pdftext.is_unicode s - then Pdftext.pdfdocstring_of_utf8 (Pdftext.utf8_of_pdfdocstring s) - else s - let rec ppstring_single_object pdf = function | Pdf.Dictionary d -> Pdf.recurse_dict (ppstring_single_object pdf) d | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> Pdf.Stream {contents = (Pdf.recurse_dict (ppstring_single_object pdf) dict, data)} | Pdf.Array a -> Pdf.recurse_array (ppstring_single_object pdf) a - | Pdf.String s -> Pdf.String (preprocess_string s) + | Pdf.String s -> Pdf.String (Pdftext.simplify_utf16be s) | x -> x let preprocess_strings pdf =