-j support

This commit is contained in:
John Whitington 2021-10-01 12:16:55 +01:00
parent a6256c0331
commit b874221aca
4 changed files with 23 additions and 4 deletions

View File

@ -18,7 +18,7 @@ OCAMLLDFLAGS = -g
all : native-code native-code-library byte-code-library top htdoc all : native-code native-code-library byte-code-library top htdoc
clean :: clean ::
rm -rf doc foo foo2 out.pdf out2.pdf *.cmt *.cmti rm -rf doc foo foo2 out.pdf out2.pdf *.cmt *.cmti *.json
LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \ LIBINSTALL_FILES = cpdf.a cpdf.cma cpdf.cmxa \
$(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \ $(foreach x,$(MODS),$x.mli) $(foreach x,$(MODS),$x.cmi) \

View File

@ -195,6 +195,12 @@ let json_of_pdf parse_content no_stream_data pdf =
let pdf = if parse_content then precombine_page_content pdf else pdf in let pdf = if parse_content then precombine_page_content pdf else pdf in
Pdf.remove_unreferenced pdf; Pdf.remove_unreferenced pdf;
let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in
let parameters =
(-1, json_of_object pdf (fun x -> ()) false
(Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 1);
("/CPDFJSONcontentparsed", Pdf.Boolean parse_content);
("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data))]))
in
let content_streams = ref [] in let content_streams = ref [] in
let fcs n = content_streams := n::!content_streams in let fcs n = content_streams := n::!content_streams in
let pairs = let pairs =
@ -203,7 +209,7 @@ let json_of_pdf parse_content no_stream_data pdf =
(fun i pdfobj -> (fun i pdfobj ->
ps := (i, json_of_object pdf fcs no_stream_data pdfobj)::!ps) ps := (i, json_of_object pdf fcs no_stream_data pdfobj)::!ps)
pdf; pdf;
trailerdict::!ps parameters::trailerdict::!ps
in in
if parse_content then if parse_content then
iter (fun n -> Pdfcodec.decode_pdfstream_until_unknown pdf (P.lookup_obj pdf n)) !content_streams; iter (fun n -> Pdfcodec.decode_pdfstream_until_unknown pdf (P.lookup_obj pdf n)) !content_streams;
@ -233,9 +239,11 @@ let json_of_pdf parse_content no_stream_data pdf =
pairs_parsed) pairs_parsed)
(* FIXME Proper streaming to output, rather than making a big string first. *) (* FIXME Proper streaming to output, rather than making a big string first. *)
let to_output output parse_content no_stream_data pdf = let to_output o parse_content no_stream_data pdf =
let b = Buffer.create 256 in let b = Buffer.create 256 in
let formatter = Format.formatter_of_buffer b in let formatter = Format.formatter_of_buffer b in
J.format formatter (json_of_pdf parse_content no_stream_data pdf); J.format formatter (json_of_pdf parse_content no_stream_data pdf);
Format.pp_print_flush formatter (); Format.pp_print_flush formatter ();
output.Pdfio.output_string (Buffer.contents b) o.Pdfio.output_string (Buffer.contents b)
let of_input i = Pdf.empty ()

View File

@ -1 +1,2 @@
val to_output : Pdfio.output -> bool -> bool -> Pdf.t -> unit val to_output : Pdfio.output -> bool -> bool -> Pdf.t -> unit
val of_input : Pdfio.input -> Pdf.t

View File

@ -1168,6 +1168,13 @@ let set_input s =
args.original_filename <- s; args.original_filename <- s;
args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs
let set_json_input s =
args.original_filename <- s;
let fh = open_in_bin s in
let pdf = CpdfJSON.of_input (Pdfio.input_of_channel fh) in
close_in fh;
args.inputs <- (AlreadyInMemory pdf, "all", "", "", ref false, None)::args.inputs
let set_input_dir s = let set_input_dir s =
let names = sort compare (leafnames_of_dir s) in let names = sort compare (leafnames_of_dir s) in
args.inputs <- args.inputs <-
@ -2197,6 +2204,9 @@ and specs =
("-output-json-no-stream-data", ("-output-json-no-stream-data",
Arg.Unit setjsonnostreamdata, Arg.Unit setjsonnostreamdata,
" Skip stream data for brevity"); " Skip stream data for brevity");
("-j",
Arg.String set_json_input,
"Load a PDF JSON file");
("-ocg-list", ("-ocg-list",
Arg.Unit (setop OCGList), Arg.Unit (setop OCGList),
" List optional content groups"); " List optional content groups");