From b09235e28b77125eff3392d4fbd1858d22f783b7 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 31 Jan 2020 10:50:31 +0000 Subject: [PATCH] Multiple content streams in JSON conversion --- cpdfcommand.ml | 5 ----- cpdfwriteJSON.ml | 11 ++++++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 074ffdb..e03d455 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4443,11 +4443,6 @@ let go () = write_pdf false (Cpdf.append_page_content s before args.fast range pdf) | Some OutputJSON -> let pdf = get_single_pdf args.op false in - Pdf.iter_stream - (function stream -> - try Pdfcodec.decode_pdfstream_until_unknown pdf stream with - e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) - pdf; write_json args.out pdf let parse_argv () = diff --git a/cpdfwriteJSON.ml b/cpdfwriteJSON.ml index a7cdfc5..f54233a 100644 --- a/cpdfwriteJSON.ml +++ b/cpdfwriteJSON.ml @@ -11,7 +11,12 @@ let rec json_of_object fcs = function | P.Name n -> J.String n | P.Array objs -> J.Array (List.map (json_of_object fcs) objs) | P.Dictionary elts -> - List.iter (function ("/Contents", P.Indirect i) -> fcs i | _ -> ()) elts; + List.iter + (function + ("/Contents", P.Indirect i) -> fcs i + | ("/Contents", P.Array elts) -> List.iter (function P.Indirect i -> fcs i | _ -> ()) elts + | _ -> ()) + elts; J.Object (List.map (fun (k, v) -> (k, json_of_object fcs v)) elts) | P.Stream {contents = (Pdf.Dictionary dict, stream)} as thestream -> Pdf.getstream thestream; @@ -130,6 +135,10 @@ let json_of_pdf parse_content pdf = pdf; trailerdict::!ps in + List.iter (Printf.printf "Found content stream %i\n") !content_streams; + List.iter + (fun n -> try Pdfcodec.decode_pdfstream_until_unknown pdf (Pdf.lookup_obj pdf n) with _ -> ()) + !content_streams; let pairs_parsed = if not parse_content then pairs else List.map