From f071f1805a3c5459a52ade288fb64758a205c3aa Mon Sep 17 00:00:00 2001
From: John Whitington <john@coherentgraphics.co.uk>
Date: Mon, 1 Nov 2021 13:17:54 +0000
Subject: [PATCH] more

---
 cpdfjson.ml | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/cpdfjson.ml b/cpdfjson.ml
index 922f66d..a5d584a 100644
--- a/cpdfjson.ml
+++ b/cpdfjson.ml
@@ -453,22 +453,12 @@ let precombine_page_content pdf =
   in
     Pdfpage.change_pages true pdf pages'
 
-(* PDF strings (except /ID in the trailer dictionary) are either PDFDocEncoding
-or UTF16BE. Many times the UTF16BE can all be represented in PDFDocEncoding.
-In this case, there are just lots of \000 bytes getting in the way making the
-JSON hard to edit. So we preprocess such simple UTF16BE strings into
-PDFDocEncoding. *)
-let preprocess_string s =
-  if Pdftext.is_unicode s
-    then Pdftext.pdfdocstring_of_utf8 (Pdftext.utf8_of_pdfdocstring s)
-    else s
-
 let rec ppstring_single_object pdf = function
   | Pdf.Dictionary d -> Pdf.recurse_dict (ppstring_single_object pdf) d
   | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
       Pdf.Stream {contents = (Pdf.recurse_dict (ppstring_single_object pdf) dict, data)}
   | Pdf.Array a -> Pdf.recurse_array (ppstring_single_object pdf) a
-  | Pdf.String s -> Pdf.String (preprocess_string s)
+  | Pdf.String s -> Pdf.String (Pdftext.simplify_utf16be s)
   | x -> x
 
 let preprocess_strings pdf =