more
This commit is contained in:
parent
a6ed214b21
commit
ece1e9f280
|
@ -1,3 +1,4 @@
|
||||||
|
(* FIXME add -output-json-precombine-contents *)
|
||||||
(* cpdf command line tools *)
|
(* cpdf command line tools *)
|
||||||
let demo = false
|
let demo = false
|
||||||
let noncomp = false
|
let noncomp = false
|
||||||
|
@ -3406,17 +3407,19 @@ let write_json output pdf =
|
||||||
| Stdout ->
|
| Stdout ->
|
||||||
Cpdfjson.to_output
|
Cpdfjson.to_output
|
||||||
(Pdfio.output_of_channel stdout)
|
(Pdfio.output_of_channel stdout)
|
||||||
args.jsonparsecontentstreams
|
~parse_content:args.jsonparsecontentstreams
|
||||||
args.jsonnostreamdata
|
~no_stream_data:args.jsonnostreamdata
|
||||||
args.jsondecompressstreams
|
~decompress_streams:args.jsondecompressstreams
|
||||||
|
~precombine_page_content:false (* FIXME add arg *)
|
||||||
pdf
|
pdf
|
||||||
| File filename ->
|
| File filename ->
|
||||||
let f = open_out filename in
|
let f = open_out filename in
|
||||||
Cpdfjson.to_output
|
Cpdfjson.to_output
|
||||||
(Pdfio.output_of_channel f)
|
(Pdfio.output_of_channel f)
|
||||||
args.jsonparsecontentstreams
|
~parse_content:args.jsonparsecontentstreams
|
||||||
args.jsonnostreamdata
|
~no_stream_data:args.jsonnostreamdata
|
||||||
args.jsondecompressstreams
|
~decompress_streams:args.jsondecompressstreams
|
||||||
|
~precombine_page_content:false (* FIXME add arg *)
|
||||||
pdf;
|
pdf;
|
||||||
close_out f
|
close_out f
|
||||||
|
|
||||||
|
|
21
cpdfjson.ml
21
cpdfjson.ml
|
@ -333,9 +333,10 @@ let parse_content_stream pdf resources bs =
|
||||||
`List (map (json_of_op pdf false) ops)
|
`List (map (json_of_op pdf false) ops)
|
||||||
|
|
||||||
(* We need to make sure each page only has one page content stream. Otherwise,
|
(* We need to make sure each page only has one page content stream. Otherwise,
|
||||||
if not split on op boundaries, each one would fail to parse on its own. *)
|
if not split on op boundaries, each one would fail to parse on its own. The
|
||||||
(* Future improvement. Don't blow up shared content streams. *)
|
caller should really only do this on otherwise-failing files, since it could
|
||||||
let precombine_page_content pdf =
|
blow up any shared content streams *)
|
||||||
|
let do_precombine_page_content pdf =
|
||||||
let pages' =
|
let pages' =
|
||||||
map
|
map
|
||||||
(fun page ->
|
(fun page ->
|
||||||
|
@ -351,8 +352,14 @@ let precombine_page_content pdf =
|
||||||
in
|
in
|
||||||
Pdfpage.change_pages true pdf pages'
|
Pdfpage.change_pages true pdf pages'
|
||||||
|
|
||||||
let json_of_pdf parse_content no_stream_data decompress_streams pdf =
|
let json_of_pdf
|
||||||
let pdf = if parse_content then precombine_page_content pdf else pdf in
|
~parse_content
|
||||||
|
~no_stream_data
|
||||||
|
~decompress_streams
|
||||||
|
~precombine_page_content
|
||||||
|
pdf
|
||||||
|
=
|
||||||
|
let pdf = if parse_content && precombine_page_content then do_precombine_page_content pdf else pdf in
|
||||||
if decompress_streams then
|
if decompress_streams then
|
||||||
Pdf.objiter (fun _ obj -> Pdfcodec.decode_pdfstream_until_unknown pdf obj) pdf;
|
Pdf.objiter (fun _ obj -> Pdfcodec.decode_pdfstream_until_unknown pdf obj) pdf;
|
||||||
Pdf.remove_unreferenced pdf;
|
Pdf.remove_unreferenced pdf;
|
||||||
|
@ -405,8 +412,8 @@ let json_of_pdf parse_content no_stream_data decompress_streams pdf =
|
||||||
pairs_parsed)
|
pairs_parsed)
|
||||||
|
|
||||||
(* FIXME Proper streaming to output *)
|
(* FIXME Proper streaming to output *)
|
||||||
let to_output o parse_content no_stream_data decompress_streams pdf =
|
let to_output o ~parse_content ~no_stream_data ~decompress_streams ~precombine_page_content pdf =
|
||||||
let json = json_of_pdf parse_content no_stream_data decompress_streams pdf in
|
let json = json_of_pdf ~parse_content ~no_stream_data ~decompress_streams ~precombine_page_content pdf in
|
||||||
o.Pdfio.output_string (J.pretty_to_string json)
|
o.Pdfio.output_string (J.pretty_to_string json)
|
||||||
|
|
||||||
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)
|
(* FIXME Proper streaming to output / from input, rather than making a big string first. *)
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
val to_output : Pdfio.output -> bool -> bool -> bool -> Pdf.t -> unit
|
val to_output : Pdfio.output -> parse_content:bool -> no_stream_data:bool -> decompress_streams:bool -> precombine_page_content:bool -> Pdf.t -> unit
|
||||||
val of_input : Pdfio.input -> Pdf.t
|
val of_input : Pdfio.input -> Pdf.t
|
||||||
|
|
Loading…
Reference in New Issue