This commit is contained in:
John Whitington 2021-10-12 15:35:08 +01:00
parent a6ed214b21
commit ece1e9f280
3 changed files with 24 additions and 14 deletions

View File

@ -1,3 +1,4 @@
(* FIXME add -output-json-precombine-contents *)
(* cpdf command line tools *)
let demo = false
let noncomp = false
@ -3406,17 +3407,19 @@ let write_json output pdf =
| Stdout ->
Cpdfjson.to_output
(Pdfio.output_of_channel stdout)
args.jsonparsecontentstreams
args.jsonnostreamdata
args.jsondecompressstreams
~parse_content:args.jsonparsecontentstreams
~no_stream_data:args.jsonnostreamdata
~decompress_streams:args.jsondecompressstreams
~precombine_page_content:false (* FIXME add arg *)
pdf
| File filename ->
let f = open_out filename in
Cpdfjson.to_output
(Pdfio.output_of_channel f)
args.jsonparsecontentstreams
args.jsonnostreamdata
args.jsondecompressstreams
~parse_content:args.jsonparsecontentstreams
~no_stream_data:args.jsonnostreamdata
~decompress_streams:args.jsondecompressstreams
~precombine_page_content:false (* FIXME add arg *)
pdf;
close_out f

View File

@ -333,9 +333,10 @@ let parse_content_stream pdf resources bs =
`List (map (json_of_op pdf false) ops)
(* We need to make sure each page only has one page content stream. Otherwise,
if not split on op boundaries, each one would fail to parse on its own. *)
(* Future improvement. Don't blow up shared content streams. *)
let precombine_page_content pdf =
if not split on op boundaries, each one would fail to parse on its own. The
caller should really only do this on otherwise-failing files, since it could
blow up any shared content streams *)
let do_precombine_page_content pdf =
let pages' =
map
(fun page ->
@ -351,8 +352,14 @@ let precombine_page_content pdf =
in
Pdfpage.change_pages true pdf pages'
let json_of_pdf parse_content no_stream_data decompress_streams pdf =
let pdf = if parse_content then precombine_page_content pdf else pdf in
let json_of_pdf
~parse_content
~no_stream_data
~decompress_streams
~precombine_page_content
pdf
=
let pdf = if parse_content && precombine_page_content then do_precombine_page_content pdf else pdf in
if decompress_streams then
Pdf.objiter (fun _ obj -> Pdfcodec.decode_pdfstream_until_unknown pdf obj) pdf;
Pdf.remove_unreferenced pdf;
@ -405,8 +412,8 @@ let json_of_pdf parse_content no_stream_data decompress_streams pdf =
pairs_parsed)
(* FIXME Proper streaming to output *)
let to_output o parse_content no_stream_data decompress_streams pdf =
let json = json_of_pdf parse_content no_stream_data decompress_streams pdf in
let to_output o ~parse_content ~no_stream_data ~decompress_streams ~precombine_page_content pdf =
let json = json_of_pdf ~parse_content ~no_stream_data ~decompress_streams ~precombine_page_content pdf in
o.Pdfio.output_string (J.pretty_to_string json)
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)

View File

@ -1,2 +1,2 @@
val to_output : Pdfio.output -> bool -> bool -> bool -> Pdf.t -> unit
val to_output : Pdfio.output -> parse_content:bool -> no_stream_data:bool -> decompress_streams:bool -> precombine_page_content:bool -> Pdf.t -> unit
val of_input : Pdfio.input -> Pdf.t