This commit is contained in:
John Whitington 2021-10-14 15:54:09 +01:00
parent fff0e7e6e3
commit 65aec369c2
2 changed files with 22 additions and 8 deletions

View File

@ -122,7 +122,16 @@ and object_of_json = function
in in
P.Stream (ref (d', P.Got (Pdfio.bytes_of_string data))) P.Stream (ref (d', P.Got (Pdfio.bytes_of_string data)))
| `Assoc ["S", `List [dict; `List parsed_ops]] -> | `Assoc ["S", `List [dict; `List parsed_ops]] ->
begin match
Pdfops.stream_of_ops (List.map op_of_json parsed_ops) Pdfops.stream_of_ops (List.map op_of_json parsed_ops)
with
| P.Stream {contents = (_, Pdf.Got data)} ->
let d' =
P.add_dict_entry (object_of_json dict) "/Length" (P.Integer (Pdfio.bytes_size data))
in
P.Stream (ref (d', Pdf.Got data))
| _ -> assert false
end
| `Assoc elts -> P.Dictionary (map (fun (n, o) -> (n, object_of_json o)) elts) | `Assoc elts -> P.Dictionary (map (fun (n, o) -> (n, object_of_json o)) elts)
| _ -> error "not recognised in object_of_json" | _ -> error "not recognised in object_of_json"
@ -220,8 +229,12 @@ let rec json_of_object pdf fcs no_stream_data pcs = function
| P.Stream {contents = (P.Dictionary dict as d, _)} -> | P.Stream {contents = (P.Dictionary dict as d, _)} ->
begin match P.lookup_direct pdf "/Subtype" d with begin match P.lookup_direct pdf "/Subtype" d with
| Some (P.Name "/Form") -> fcs i | Some (P.Name "/Form") -> fcs i
| _ ->
begin match P.lookup_direct pdf "/Type" d with
| Some (P.Name "/Pattern") -> fcs i
| _ -> () | _ -> ()
end end
end
| _ -> () | _ -> ()
end; end;
`Int i `Int i
@ -330,9 +343,7 @@ let json_of_op pdf no_stream_data = function
`List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data false obj; `String "DP"] `List [`String s; json_of_object pdf (fun _ -> ()) no_stream_data false obj; `String "DP"]
(* parse_stream needs pdf and resources. These are for lexing of inline images, (* parse_stream needs pdf and resources. These are for lexing of inline images,
* looking up the colourspace. We do not need to worry about inherited * looking up the colourspace. *)
* resources, though? For now, don't worry about inherited resources: check in
* PDF standard. *)
let parse_content_stream pdf resources bs = let parse_content_stream pdf resources bs =
let ops = O.parse_stream pdf resources [bs] in let ops = O.parse_stream pdf resources [bs] in
`List (map (json_of_op pdf false) ops) `List (map (json_of_op pdf false) ops)
@ -395,7 +406,6 @@ let json_of_pdf
if mem objnum !content_streams then if mem objnum !content_streams then
begin match obj with begin match obj with
| `Assoc ["S", `List [dict; `String _]] -> | `Assoc ["S", `List [dict; `String _]] ->
(* FIXME Proper resources here for reasons explained above? *)
let streamdata = let streamdata =
match P.lookup_obj pdf objnum with match P.lookup_obj pdf objnum with
| P.Stream {contents = (_, P.Got b)} -> b | P.Stream {contents = (_, P.Got b)} -> b
@ -422,9 +432,12 @@ let to_output o ~parse_content ~no_stream_data ~decompress_streams ~precombine_p
let of_input i = let of_input i =
try try
match i.Pdfio.caml_channel with match i.Pdfio.caml_channel with
| Some ch -> pdf_of_json (J.from_channel ch) | Some ch ->
let r = pdf_of_json (J.from_channel ch) in
Pdfwrite.pdf_to_file r "debug.pdf";
r
| None -> | None ->
let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 (i.Pdfio.in_channel_length)) in let content = Pdfio.string_of_bytes (Pdfio.bytes_of_input i 0 i.Pdfio.in_channel_length) in
pdf_of_json (J.from_string content) pdf_of_json (J.from_string content)
with with
e -> error (Printexc.to_string e) e -> error (Printexc.to_string e)

View File

@ -1,3 +1,4 @@
%Document the new JSON format
%Document -j to input JSON %Document -j to input JSON
%Document -decrypt-force %Document -decrypt-force
%Document -output-json-precombine-object-streams and friends %Document -output-json-precombine-object-streams and friends