diff --git a/cpdfcommand.ml b/cpdfcommand.ml index e03d455..074ffdb 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4443,6 +4443,11 @@ let go () = write_pdf false (Cpdf.append_page_content s before args.fast range pdf) | Some OutputJSON -> let pdf = get_single_pdf args.op false in + Pdf.iter_stream + (function stream -> + try Pdfcodec.decode_pdfstream_until_unknown pdf stream with + e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) + pdf; write_json args.out pdf let parse_argv () = diff --git a/cpdfwriteJSON.ml b/cpdfwriteJSON.ml index c427467..21628be 100644 --- a/cpdfwriteJSON.ml +++ b/cpdfwriteJSON.ml @@ -1,5 +1,6 @@ module J = Tjjson module P = Pdf +module O = Pdfops let rec json_of_object fcs = function | P.Null -> J.String "null" @@ -20,7 +21,85 @@ let rec json_of_object fcs = function | P.Stream _ -> J.String "error: stream with not-a-dictioary" | P.Indirect i -> J.Number (string_of_int i) -let parse_content_stream str = J.String ("PARSED: str") +let json_of_op = function + | O.Op_w w -> J.Array [J.String "w"] + | O.Op_J j -> J.Array [J.String "J"] + | O.Op_j j -> J.Array [J.String "j"] + | O.Op_M m -> J.Array [J.String "m"] + | O.Op_d (fl, y) ->J.Array [J.String "d"] + | O.Op_ri s -> J.Array [J.String "ri"] + | O.Op_i i -> J.Array [J.String "i"] + | O.Op_gs s -> J.Array [J.String "gs"] + | O.Op_q -> J.Array [J.String "q"] + | O.Op_Q -> J.Array [J.String "Q"] + | O.Op_cm t ->J.Array [J.String "cm"] + | O.Op_m (a, b) ->J.Array [J.String "m"] + | O.Op_l (a, b) ->J.Array [J.String "l"] + | O.Op_c (a, b, c, d, e, k) ->J.Array [J.String "c"] + | O.Op_v (a, b, c, d) ->J.Array [J.String "v"] + | O.Op_y (a, b, c, d) -> J.Array [J.String "y"] + | O.Op_h -> J.Array [J.String "h"] + | O.Op_re (a, b, c, d) -> J.Array [J.String "re"] + | O.Op_S -> J.Array [J.String "S"] + | O.Op_s -> J.Array [J.String "s"] + | O.Op_f -> J.Array [J.String "f"] + | O.Op_F -> J.Array [J.String "F"] + | O.Op_f' ->J.Array [J.String "f'"] + | O.Op_B -> J.Array [J.String "B"] + | O.Op_B' -> J.Array [J.String "B'"] + | O.Op_b -> J.Array [J.String "b"] + | O.Op_b' -> J.Array [J.String "b'"] + | O.Op_n -> J.Array [J.String "n"] + | O.Op_W -> J.Array [J.String "W"] + | O.Op_W' -> J.Array [J.String "W'"] + | O.Op_BT -> J.Array [J.String "BT"] + | O.Op_ET -> J.Array [J.String "ET"] + | O.Op_Tc c ->J.Array [J.String "Tc"] + | O.Op_Tw w -> J.Array [J.String "Tw"] + | O.Op_Tz z -> J.Array [J.String "Tz"] + | O.Op_TL l -> J.Array [J.String "TL"] + | O.Op_Tf (k, s) ->J.Array [J.String "Tf"] + | O.Op_Tr i -> J.Array [J.String "Tr"] + | O.Op_Ts k -> J.Array [J.String "Ts"] + | O.Op_Td (k, k') ->J.Array [J.String "Td"] + | O.Op_TD (k, k') ->J.Array [J.String "TD"] + | O.Op_Tm t ->J.Array [J.String "Tm"] + | O.Op_T' -> J.Array [J.String "T'"] + | O.Op_Tj s -> J.Array [J.String "Tj"] + | O.Op_TJ pdfobject -> J.Array [J.String "TJ"] + | O.Op_' s -> J.Array [J.String "'"] + | O.Op_'' (k, k', s) -> J.Array [J.String "''"] + | O.Op_d0 (k, k') ->J.Array [J.String "d0"] + | O.Op_d1 (a, b, c, d, e, k) ->J.Array [J.String "d1"] + | O.Op_CS s -> J.Array [J.String "CS"] + | O.Op_cs s -> J.Array [J.String "cs"] + | O.Op_SC fs -> J.Array [J.String "SC"] + | O.Op_sc fs -> J.Array [J.String "sc"] + | O.Op_SCN fs -> J.Array [J.String "SCN"] + | O.Op_scn fs -> J.Array [J.String "scn"] + | O.Op_SCNName (s, fs) ->J.Array [J.String "SCNName"] + | O.Op_scnName (s, fs) ->J.Array [J.String "scnName"] + | O.Op_G k -> J.Array [J.String "G"] + | O.Op_g k -> J.Array [J.String "g"] + | O.Op_RG (r, g, b) ->J.Array [J.String "RG"] + | O.Op_rg (r, g, b) ->J.Array [J.String "rg"] + | O.Op_K (c, m, y, k) ->J.Array [J.String "K"] + | O.Op_k (c, m, y, k) ->J.Array [J.String "k"] + | O.Op_sh s -> J.Array [J.String "sh"] + | O.InlineImage (dict, data) -> J.Array [J.String "InlineImage"] + | O.Op_Do s -> J.Array [J.String "Do"] + | O.Op_MP s -> J.Array [J.String "MP"] + | O.Op_DP (s, obj) ->J.Array [J.String "DP"] + | O.Op_BMC s -> J.Array [J.String "BMC"] + | O.Op_BDC (s, obj) ->J.Array [J.String "BDC"] + | O.Op_EMC -> J.Array [J.String "EMC"] + | O.Op_BX -> J.Array [J.String "BX"] + | O.Op_EX ->J.Array [J.String "EX"] + | O.Op_Unknown _ ->J.Array [J.String "Unknown"] + +let parse_content_stream str = + let ops = Pdfops.parse_stream (Pdf.empty ()) (Pdf.Dictionary []) [Pdfio.bytes_of_string str] in + J.Array (List.map json_of_op ops) let json_of_pdf parse_content pdf = let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in