From b874221aca2525d09fdfa6976a08055398c5efd4 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 1 Oct 2021 12:16:55 +0100 Subject: [PATCH] -j support --- Makefile | 2 +- cpdfJSON.ml | 14 +++++++++++--- cpdfJSON.mli | 1 + cpdfcommand.ml | 10 ++++++++++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 57c0f88..c54c421 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ OCAMLLDFLAGS = -g all : native-code native-code-library byte-code-library top htdoc clean :: - rm -rf doc foo foo2 out.pdf out2.pdf *.cmt *.cmti + rm -rf doc foo foo2 out.pdf out2.pdf *.cmt *.cmti *.json LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \ $(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \ diff --git a/cpdfJSON.ml b/cpdfJSON.ml index 2caf6fb..0ea16d8 100644 --- a/cpdfJSON.ml +++ b/cpdfJSON.ml @@ -195,6 +195,12 @@ let json_of_pdf parse_content no_stream_data pdf = let pdf = if parse_content then precombine_page_content pdf else pdf in Pdf.remove_unreferenced pdf; let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in + let parameters = + (-1, json_of_object pdf (fun x -> ()) false + (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 1); + ("/CPDFJSONcontentparsed", Pdf.Boolean parse_content); + ("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data))])) + in let content_streams = ref [] in let fcs n = content_streams := n::!content_streams in let pairs = @@ -203,7 +209,7 @@ let json_of_pdf parse_content no_stream_data pdf = (fun i pdfobj -> ps := (i, json_of_object pdf fcs no_stream_data pdfobj)::!ps) pdf; - trailerdict::!ps + parameters::trailerdict::!ps in if parse_content then iter (fun n -> Pdfcodec.decode_pdfstream_until_unknown pdf (P.lookup_obj pdf n)) !content_streams; @@ -233,9 +239,11 @@ let json_of_pdf parse_content no_stream_data pdf = pairs_parsed) (* FIXME Proper streaming to output, rather than making a big string first. *) -let to_output output parse_content no_stream_data pdf = +let to_output o parse_content no_stream_data pdf = let b = Buffer.create 256 in let formatter = Format.formatter_of_buffer b in J.format formatter (json_of_pdf parse_content no_stream_data pdf); Format.pp_print_flush formatter (); - output.Pdfio.output_string (Buffer.contents b) + o.Pdfio.output_string (Buffer.contents b) + +let of_input i = Pdf.empty () diff --git a/cpdfJSON.mli b/cpdfJSON.mli index 97eafa6..8a42e23 100644 --- a/cpdfJSON.mli +++ b/cpdfJSON.mli @@ -1 +1,2 @@ val to_output : Pdfio.output -> bool -> bool -> Pdf.t -> unit +val of_input : Pdfio.input -> Pdf.t diff --git a/cpdfcommand.ml b/cpdfcommand.ml index f4aa021..3d70e8f 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -1168,6 +1168,13 @@ let set_input s = args.original_filename <- s; args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs +let set_json_input s = + args.original_filename <- s; + let fh = open_in_bin s in + let pdf = CpdfJSON.of_input (Pdfio.input_of_channel fh) in + close_in fh; + args.inputs <- (AlreadyInMemory pdf, "all", "", "", ref false, None)::args.inputs + let set_input_dir s = let names = sort compare (leafnames_of_dir s) in args.inputs <- @@ -2197,6 +2204,9 @@ and specs = ("-output-json-no-stream-data", Arg.Unit setjsonnostreamdata, " Skip stream data for brevity"); + ("-j", + Arg.String set_json_input, + "Load a PDF JSON file"); ("-ocg-list", Arg.Unit (setop OCGList), " List optional content groups");