Beginnings of output of JSON

This commit is contained in:
John Whitington 2020-01-30 14:10:30 +00:00
parent 60e028858c
commit b4f8500ef5
2 changed files with 85 additions and 1 deletions

View File

@ -4443,6 +4443,11 @@ let go () =
write_pdf false (Cpdf.append_page_content s before args.fast range pdf) write_pdf false (Cpdf.append_page_content s before args.fast range pdf)
| Some OutputJSON -> | Some OutputJSON ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
Pdf.iter_stream
(function stream ->
try Pdfcodec.decode_pdfstream_until_unknown pdf stream with
e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ())
pdf;
write_json args.out pdf write_json args.out pdf
let parse_argv () = let parse_argv () =

View File

@ -1,5 +1,6 @@
module J = Tjjson module J = Tjjson
module P = Pdf module P = Pdf
module O = Pdfops
let rec json_of_object fcs = function let rec json_of_object fcs = function
| P.Null -> J.String "null" | P.Null -> J.String "null"
@ -20,7 +21,85 @@ let rec json_of_object fcs = function
| P.Stream _ -> J.String "error: stream with not-a-dictioary" | P.Stream _ -> J.String "error: stream with not-a-dictioary"
| P.Indirect i -> J.Number (string_of_int i) | P.Indirect i -> J.Number (string_of_int i)
let parse_content_stream str = J.String ("PARSED: str") let json_of_op = function
| O.Op_w w -> J.Array [J.String "w"]
| O.Op_J j -> J.Array [J.String "J"]
| O.Op_j j -> J.Array [J.String "j"]
| O.Op_M m -> J.Array [J.String "m"]
| O.Op_d (fl, y) ->J.Array [J.String "d"]
| O.Op_ri s -> J.Array [J.String "ri"]
| O.Op_i i -> J.Array [J.String "i"]
| O.Op_gs s -> J.Array [J.String "gs"]
| O.Op_q -> J.Array [J.String "q"]
| O.Op_Q -> J.Array [J.String "Q"]
| O.Op_cm t ->J.Array [J.String "cm"]
| O.Op_m (a, b) ->J.Array [J.String "m"]
| O.Op_l (a, b) ->J.Array [J.String "l"]
| O.Op_c (a, b, c, d, e, k) ->J.Array [J.String "c"]
| O.Op_v (a, b, c, d) ->J.Array [J.String "v"]
| O.Op_y (a, b, c, d) -> J.Array [J.String "y"]
| O.Op_h -> J.Array [J.String "h"]
| O.Op_re (a, b, c, d) -> J.Array [J.String "re"]
| O.Op_S -> J.Array [J.String "S"]
| O.Op_s -> J.Array [J.String "s"]
| O.Op_f -> J.Array [J.String "f"]
| O.Op_F -> J.Array [J.String "F"]
| O.Op_f' ->J.Array [J.String "f'"]
| O.Op_B -> J.Array [J.String "B"]
| O.Op_B' -> J.Array [J.String "B'"]
| O.Op_b -> J.Array [J.String "b"]
| O.Op_b' -> J.Array [J.String "b'"]
| O.Op_n -> J.Array [J.String "n"]
| O.Op_W -> J.Array [J.String "W"]
| O.Op_W' -> J.Array [J.String "W'"]
| O.Op_BT -> J.Array [J.String "BT"]
| O.Op_ET -> J.Array [J.String "ET"]
| O.Op_Tc c ->J.Array [J.String "Tc"]
| O.Op_Tw w -> J.Array [J.String "Tw"]
| O.Op_Tz z -> J.Array [J.String "Tz"]
| O.Op_TL l -> J.Array [J.String "TL"]
| O.Op_Tf (k, s) ->J.Array [J.String "Tf"]
| O.Op_Tr i -> J.Array [J.String "Tr"]
| O.Op_Ts k -> J.Array [J.String "Ts"]
| O.Op_Td (k, k') ->J.Array [J.String "Td"]
| O.Op_TD (k, k') ->J.Array [J.String "TD"]
| O.Op_Tm t ->J.Array [J.String "Tm"]
| O.Op_T' -> J.Array [J.String "T'"]
| O.Op_Tj s -> J.Array [J.String "Tj"]
| O.Op_TJ pdfobject -> J.Array [J.String "TJ"]
| O.Op_' s -> J.Array [J.String "'"]
| O.Op_'' (k, k', s) -> J.Array [J.String "''"]
| O.Op_d0 (k, k') ->J.Array [J.String "d0"]
| O.Op_d1 (a, b, c, d, e, k) ->J.Array [J.String "d1"]
| O.Op_CS s -> J.Array [J.String "CS"]
| O.Op_cs s -> J.Array [J.String "cs"]
| O.Op_SC fs -> J.Array [J.String "SC"]
| O.Op_sc fs -> J.Array [J.String "sc"]
| O.Op_SCN fs -> J.Array [J.String "SCN"]
| O.Op_scn fs -> J.Array [J.String "scn"]
| O.Op_SCNName (s, fs) ->J.Array [J.String "SCNName"]
| O.Op_scnName (s, fs) ->J.Array [J.String "scnName"]
| O.Op_G k -> J.Array [J.String "G"]
| O.Op_g k -> J.Array [J.String "g"]
| O.Op_RG (r, g, b) ->J.Array [J.String "RG"]
| O.Op_rg (r, g, b) ->J.Array [J.String "rg"]
| O.Op_K (c, m, y, k) ->J.Array [J.String "K"]
| O.Op_k (c, m, y, k) ->J.Array [J.String "k"]
| O.Op_sh s -> J.Array [J.String "sh"]
| O.InlineImage (dict, data) -> J.Array [J.String "InlineImage"]
| O.Op_Do s -> J.Array [J.String "Do"]
| O.Op_MP s -> J.Array [J.String "MP"]
| O.Op_DP (s, obj) ->J.Array [J.String "DP"]
| O.Op_BMC s -> J.Array [J.String "BMC"]
| O.Op_BDC (s, obj) ->J.Array [J.String "BDC"]
| O.Op_EMC -> J.Array [J.String "EMC"]
| O.Op_BX -> J.Array [J.String "BX"]
| O.Op_EX ->J.Array [J.String "EX"]
| O.Op_Unknown _ ->J.Array [J.String "Unknown"]
let parse_content_stream str =
let ops = Pdfops.parse_stream (Pdf.empty ()) (Pdf.Dictionary []) [Pdfio.bytes_of_string str] in
J.Array (List.map json_of_op ops)
let json_of_pdf parse_content pdf = let json_of_pdf parse_content pdf =
let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in