From d4f0bd202d0434a5c25e3c2a4b3e86f6b0731107 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 1 Oct 2021 15:21:03 +0100 Subject: [PATCH] First successful JSON round trip --- cpdfJSON.ml | 79 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/cpdfJSON.ml b/cpdfJSON.ml index f092e6f..ea4c41a 100644 --- a/cpdfJSON.ml +++ b/cpdfJSON.ml @@ -1,3 +1,5 @@ +(*FIXME failwiths -> Pdf.PDFError or similar *) +(*FIXME flprintfs to eprintf *) open Pdfutil module J = Tjjson @@ -9,13 +11,26 @@ let soi = string_of_int let string_of_float _ = failwith "use sof" let string_of_int _ = failwith "use soi" +let rec object_of_json = function + | J.Null -> P.Null + | J.Bool b -> P.Boolean b + | J.Number n -> Pdf.Indirect (int_of_string n) + | J.String s -> P.String s + | J.Array objs -> P.Array (map object_of_json objs) + | J.Object ["I", J.Number i] -> P.Integer (int_of_string i) + | J.Object ["F", J.Number f] -> P.Real (float_of_string f) + | J.Object ["N", J.String n] -> P.Name n + | J.Object ["S", J.Array [dict; J.String data]] -> + P.Stream (ref (object_of_json dict, P.Got (Pdfio.bytes_of_string data))) + | J.Object elts -> P.Dictionary (map (fun (n, o) -> (n, object_of_json o)) elts) + let rec json_of_object pdf fcs no_stream_data = function - | P.Null -> J.String "null" + | P.Null -> J.Null | P.Boolean b -> J.Bool b - | P.Integer i -> J.Number (soi i) - | P.Real r -> J.Number (sof r) + | P.Integer i -> J.Object [("I", J.Number (soi i))] + | P.Real r -> J.Object [("F", J.Number (sof r))] | P.String s -> J.String s - | P.Name n -> J.String n + | P.Name n -> J.Object [("N", J.String n)] | P.Array objs -> J.Array (map (json_of_object pdf fcs no_stream_data) objs) | P.Dictionary elts -> iter @@ -41,8 +56,8 @@ let rec json_of_object pdf fcs no_stream_data = function match stream with P.Got b -> Pdfio.string_of_bytes b | P.ToGet _ -> "failure: toget" end in - json_of_object pdf fcs no_stream_data (P.Array [P.Dictionary dict; P.String str]) - | P.Stream _ -> J.String "error: stream with not-a-dictioary" + json_of_object pdf fcs no_stream_data (P.Dictionary [("S", P.Array [P.Dictionary dict; P.String str])]) + | P.Stream _ -> J.String "error: stream with not-a-dictionary" | P.Indirect i -> begin match P.lookup_obj pdf i with | P.Stream {contents = (P.Dictionary dict as d, _)} -> @@ -197,9 +212,12 @@ let json_of_pdf parse_content no_stream_data pdf = let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.P.trailerdict) in let parameters = (-1, json_of_object pdf (fun x -> ()) false - (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 1); + (Pdf.Dictionary [("/CPDFJSONformatversion", Pdf.Integer 2); ("/CPDFJSONcontentparsed", Pdf.Boolean parse_content); - ("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data))])) + ("/CPDFJSONstreamdataincluded", Pdf.Boolean (not no_stream_data)); + ("/CPDFJSONmajorpdfversion", Pdf.Integer pdf.Pdf.major); + ("/CPDFJSONminorpdfversion", Pdf.Integer pdf.Pdf.minor); + ])) in let content_streams = ref [] in let fcs n = content_streams := n::!content_streams in @@ -239,22 +257,55 @@ let json_of_pdf parse_content no_stream_data pdf = pairs_parsed) let pdf_of_json json = - let major = 0 in - let minor = 1 in - let root = 2 in - let objmap = P.pdfobjmap_empty () in + (*flprint (J.show json); + flprint "\n";*) + let objs = match json with J.Array objs -> objs | _ -> failwith "bad json top level" in + let params = ref Pdf.Null in + let trailerdict = ref Pdf.Null in + let objects = + option_map + (function + | J.Array [J.Number n; o] -> + let objnum = int_of_string n in + begin match objnum with + | -1 -> params := object_of_json o; None + | 0 -> trailerdict := object_of_json o; None + | n when n < 0 -> None + | n -> Some (n, object_of_json o) + end + | _ -> failwith "json bad obj") + objs + in + (*List.iter (fun (i, o) -> flprint (soi i); flprint "\n"; flprint (Pdfwrite.string_of_pdf o); flprint "\n") objects;*) + let major = + match Pdf.lookup_direct (Pdf.empty ()) "/CPDFJSONmajorpdfversion" !params with + Some (Pdf.Integer i) -> i | _ -> failwith "bad major version" + in + let minor = + match Pdf.lookup_direct (Pdf.empty ()) "/CPDFJSONminorpdfversion" !params with + Some (Pdf.Integer i) -> i | _ -> failwith "bad minor version" + in + (*flprint (Pdfwrite.string_of_pdf !trailerdict);*) + let root = + match !trailerdict with Pdf.Dictionary d -> + begin match lookup "/Root" d with + Some (Pdf.Indirect i) -> i | _ -> failwith "bad root" + end + | _ -> failwith "bad root 2" + in + let objmap = P.pdfobjmap_empty () in + List.iter (fun (k, v) -> Hashtbl.add objmap k (ref (P.Parsed v), 0)) objects; let objects = {P.maxobjnum = 0; P.parse = None; P.pdfobjects = objmap; P.object_stream_ids = Hashtbl.create 0} in - let trailerdict = P.Null in {P.major; P.minor; P.root; P.objects; - P.trailerdict; + P.trailerdict = !trailerdict; P.was_linearized = false; P.saved_encryption = None}