option to uncompress json streams
This commit is contained in:
parent
33e2a86f55
commit
14edc5a5db
|
@ -450,6 +450,7 @@ type args =
|
||||||
mutable removeonly : string option;
|
mutable removeonly : string option;
|
||||||
mutable jsonparsecontentstreams : bool;
|
mutable jsonparsecontentstreams : bool;
|
||||||
mutable jsonnostreamdata : bool;
|
mutable jsonnostreamdata : bool;
|
||||||
|
mutable jsondecompressstreams : bool;
|
||||||
mutable ocgrenamefrom : string;
|
mutable ocgrenamefrom : string;
|
||||||
mutable ocgrenameto : string;
|
mutable ocgrenameto : string;
|
||||||
mutable dedup : bool;
|
mutable dedup : bool;
|
||||||
|
@ -554,6 +555,7 @@ let args =
|
||||||
removeonly = None;
|
removeonly = None;
|
||||||
jsonparsecontentstreams = false;
|
jsonparsecontentstreams = false;
|
||||||
jsonnostreamdata = false;
|
jsonnostreamdata = false;
|
||||||
|
jsondecompressstreams = false;
|
||||||
ocgrenamefrom = "";
|
ocgrenamefrom = "";
|
||||||
ocgrenameto = "";
|
ocgrenameto = "";
|
||||||
dedup = false;
|
dedup = false;
|
||||||
|
@ -643,6 +645,7 @@ let reset_arguments () =
|
||||||
args.removeonly <- None;
|
args.removeonly <- None;
|
||||||
args.jsonparsecontentstreams <- false;
|
args.jsonparsecontentstreams <- false;
|
||||||
args.jsonnostreamdata <- false;
|
args.jsonnostreamdata <- false;
|
||||||
|
args.jsondecompressstreams <- false;
|
||||||
args.ocgrenamefrom <- "";
|
args.ocgrenamefrom <- "";
|
||||||
args.ocgrenameto <- "";
|
args.ocgrenameto <- "";
|
||||||
args.dedup <- false;
|
args.dedup <- false;
|
||||||
|
@ -1499,6 +1502,9 @@ let setjsonparsecontentstreams () =
|
||||||
let setjsonnostreamdata () =
|
let setjsonnostreamdata () =
|
||||||
args.jsonnostreamdata <- true
|
args.jsonnostreamdata <- true
|
||||||
|
|
||||||
|
let setjsondecompressstreams () =
|
||||||
|
args.jsondecompressstreams <- true
|
||||||
|
|
||||||
let setocgrenamefrom s =
|
let setocgrenamefrom s =
|
||||||
args.ocgrenamefrom <- s
|
args.ocgrenamefrom <- s
|
||||||
|
|
||||||
|
@ -2194,6 +2200,9 @@ and specs =
|
||||||
("-output-json-no-stream-data",
|
("-output-json-no-stream-data",
|
||||||
Arg.Unit setjsonnostreamdata,
|
Arg.Unit setjsonnostreamdata,
|
||||||
" Skip stream data for brevity");
|
" Skip stream data for brevity");
|
||||||
|
("-output-json-decompress-streams",
|
||||||
|
Arg.Unit setjsondecompressstreams,
|
||||||
|
" Skip stream data for brevity");
|
||||||
("-j",
|
("-j",
|
||||||
Arg.String set_json_input,
|
Arg.String set_json_input,
|
||||||
"Load a PDF JSON file");
|
"Load a PDF JSON file");
|
||||||
|
@ -3395,10 +3404,20 @@ let write_json output pdf =
|
||||||
| NoOutputSpecified ->
|
| NoOutputSpecified ->
|
||||||
error "-output-json: no output name specified"
|
error "-output-json: no output name specified"
|
||||||
| Stdout ->
|
| Stdout ->
|
||||||
Cpdfjson.to_output (Pdfio.output_of_channel stdout) args.jsonparsecontentstreams args.jsonnostreamdata pdf
|
Cpdfjson.to_output
|
||||||
|
(Pdfio.output_of_channel stdout)
|
||||||
|
args.jsonparsecontentstreams
|
||||||
|
args.jsonnostreamdata
|
||||||
|
args.jsondecompressstreams
|
||||||
|
pdf
|
||||||
| File filename ->
|
| File filename ->
|
||||||
let f = open_out filename in
|
let f = open_out filename in
|
||||||
Cpdfjson.to_output (Pdfio.output_of_channel f) args.jsonparsecontentstreams args.jsonnostreamdata pdf;
|
Cpdfjson.to_output
|
||||||
|
(Pdfio.output_of_channel f)
|
||||||
|
args.jsonparsecontentstreams
|
||||||
|
args.jsonnostreamdata
|
||||||
|
args.jsondecompressstreams
|
||||||
|
pdf;
|
||||||
close_out f
|
close_out f
|
||||||
|
|
||||||
(* Main function *)
|
(* Main function *)
|
||||||
|
|
28
cpdfjson.ml
28
cpdfjson.ml
|
@ -299,8 +299,16 @@ let precombine_page_content pdf =
|
||||||
in
|
in
|
||||||
Pdfpage.change_pages true pdf pages'
|
Pdfpage.change_pages true pdf pages'
|
||||||
|
|
||||||
let json_of_pdf parse_content no_stream_data pdf =
|
let json_of_pdf parse_content no_stream_data decompress_streams pdf =
|
||||||
let pdf = if parse_content then precombine_page_content pdf else pdf in
|
let pdf = if parse_content then precombine_page_content pdf else pdf in
|
||||||
|
if decompress_streams then
|
||||||
|
Pdf.objiter
|
||||||
|
(fun n obj ->
|
||||||
|
Printf.eprintf "obj %i\n" n;
|
||||||
|
match obj with
|
||||||
|
| Pdf.Stream _ -> Printf.eprintf "decompressing...\n"; Pdfcodec.decode_pdfstream_until_unknown pdf obj
|
||||||
|
| _ -> ())
|
||||||
|
pdf;
|
||||||
Pdf.remove_unreferenced pdf;
|
Pdf.remove_unreferenced pdf;
|
||||||
let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in
|
let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in
|
||||||
let parameters =
|
let parameters =
|
||||||
|
@ -313,7 +321,10 @@ let json_of_pdf parse_content no_stream_data pdf =
|
||||||
]))
|
]))
|
||||||
in
|
in
|
||||||
let content_streams = ref [] in
|
let content_streams = ref [] in
|
||||||
let fcs n = content_streams := n::!content_streams in
|
let fcs n =
|
||||||
|
content_streams := n::!content_streams;
|
||||||
|
if parse_content then Pdfcodec.decode_pdfstream_until_unknown pdf (P.lookup_obj pdf n)
|
||||||
|
in
|
||||||
let pairs =
|
let pairs =
|
||||||
let ps = ref [] in
|
let ps = ref [] in
|
||||||
P.objiter
|
P.objiter
|
||||||
|
@ -322,8 +333,6 @@ let json_of_pdf parse_content no_stream_data pdf =
|
||||||
pdf;
|
pdf;
|
||||||
parameters::trailerdict::!ps
|
parameters::trailerdict::!ps
|
||||||
in
|
in
|
||||||
if parse_content then
|
|
||||||
iter (fun n -> Pdfcodec.decode_pdfstream_until_unknown pdf (P.lookup_obj pdf n)) !content_streams;
|
|
||||||
let pairs_parsed =
|
let pairs_parsed =
|
||||||
if not parse_content then pairs else
|
if not parse_content then pairs else
|
||||||
map
|
map
|
||||||
|
@ -350,8 +359,7 @@ let json_of_pdf parse_content no_stream_data pdf =
|
||||||
pairs_parsed)
|
pairs_parsed)
|
||||||
|
|
||||||
let pdf_of_json json =
|
let pdf_of_json json =
|
||||||
(*flprint (J.show json);
|
(*flprint (J.show json); flprint "\n";*)
|
||||||
flprint "\n";*)
|
|
||||||
let objs = match json with J.Array objs -> objs | _ -> error "bad json top level" in
|
let objs = match json with J.Array objs -> objs | _ -> error "bad json top level" in
|
||||||
let params = ref Pdf.Null in
|
let params = ref Pdf.Null in
|
||||||
let trailerdict = ref Pdf.Null in
|
let trailerdict = ref Pdf.Null in
|
||||||
|
@ -369,8 +377,7 @@ let pdf_of_json json =
|
||||||
| _ -> error "json bad obj")
|
| _ -> error "json bad obj")
|
||||||
objs
|
objs
|
||||||
in
|
in
|
||||||
(*List.
|
(*List. iter (fun (i, o) -> flprint (soi i); flprint "\n"; flprint (Pdfwrite.string_of_pdf o); flprint "\n") objects;*)
|
||||||
iter (fun (i, o) -> flprint (soi i); flprint "\n"; flprint (Pdfwrite.string_of_pdf o); flprint "\n") objects;*)
|
|
||||||
begin match Pdf.lookup_direct (Pdf.empty ()) "/CPDFJSONstreamdataincluded" !params with
|
begin match Pdf.lookup_direct (Pdf.empty ()) "/CPDFJSONstreamdataincluded" !params with
|
||||||
| Some (Pdf.Boolean false) -> error "no stream data; cannot reconstruct PDF"
|
| Some (Pdf.Boolean false) -> error "no stream data; cannot reconstruct PDF"
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
@ -408,12 +415,13 @@ let pdf_of_json json =
|
||||||
P.saved_encryption = None}
|
P.saved_encryption = None}
|
||||||
|
|
||||||
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)
|
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)
|
||||||
let to_output o parse_content no_stream_data pdf =
|
let to_output o parse_content no_stream_data decompress_streams pdf =
|
||||||
let b = Buffer.create 256 in
|
let b = Buffer.create 256 in
|
||||||
let formatter = Format.formatter_of_buffer b in
|
let formatter = Format.formatter_of_buffer b in
|
||||||
J.format formatter (json_of_pdf parse_content no_stream_data pdf);
|
J.format formatter (json_of_pdf parse_content no_stream_data decompress_streams pdf);
|
||||||
Format.pp_print_flush formatter ();
|
Format.pp_print_flush formatter ();
|
||||||
o.Pdfio.output_string (Buffer.contents b)
|
o.Pdfio.output_string (Buffer.contents b)
|
||||||
|
|
||||||
|
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)
|
||||||
let of_input i =
|
let of_input i =
|
||||||
pdf_of_json (J.parse (Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 (i.Pdfio.in_channel_length))))
|
pdf_of_json (J.parse (Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 (i.Pdfio.in_channel_length))))
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
val to_output : Pdfio.output -> bool -> bool -> Pdf.t -> unit
|
val to_output : Pdfio.output -> bool -> bool -> bool -> Pdf.t -> unit
|
||||||
val of_input : Pdfio.input -> Pdf.t
|
val of_input : Pdfio.input -> Pdf.t
|
||||||
|
|
Loading…
Reference in New Issue