Wrote JSON outputter
This commit is contained in:
parent
b9b3d5477e
commit
60e028858c
2
Changes
2
Changes
|
@ -2,6 +2,8 @@ Version 2.4 (to come)
|
||||||
|
|
||||||
o Vendored in tiny_json from Yoshihiro Imai via Jan Furuse
|
o Vendored in tiny_json from Yoshihiro Imai via Jan Furuse
|
||||||
o New -labels-progress option to improve page labels interface
|
o New -labels-progress option to improve page labels interface
|
||||||
|
o New options -output-json and -output-json-parse-content streams to export PDF
|
||||||
|
files in JSON format
|
||||||
|
|
||||||
Version 2.3 (patchlevel 1, December 2019)
|
Version 2.3 (patchlevel 1, December 2019)
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,61 @@
|
||||||
module J = Tjjson
|
module J = Tjjson
|
||||||
|
module P = Pdf
|
||||||
|
|
||||||
let test = J.Array [J.Number "100"; J.String "foo"]
|
let rec json_of_object fcs = function
|
||||||
|
| P.Null -> J.String "null"
|
||||||
|
| P.Boolean b -> J.Bool b
|
||||||
|
| P.Integer i -> J.Number (string_of_int i)
|
||||||
|
| P.Real r -> J.Number (string_of_float r)
|
||||||
|
| P.String s -> J.String s
|
||||||
|
| P.Name n -> J.String n
|
||||||
|
| P.Array objs -> J.Array (List.map (json_of_object fcs) objs)
|
||||||
|
| P.Dictionary elts ->
|
||||||
|
(* Detect contents stream object numbers for second pass *)
|
||||||
|
List.iter (function ("/Contents", P.Indirect i) -> fcs i | _ -> ()) elts;
|
||||||
|
J.Object (List.map (fun (k, v) -> (k, json_of_object fcs v)) elts)
|
||||||
|
| P.Stream {contents = (Pdf.Dictionary dict, stream)} as thestream ->
|
||||||
|
Pdf.getstream thestream;
|
||||||
|
let str = match stream with Got b -> Pdfio.string_of_bytes b | ToGet _ -> "failure: toget" in
|
||||||
|
json_of_object fcs (P.Array [P.Dictionary dict; P.String str])
|
||||||
|
| P.Stream _ -> J.String "error: stream with not-a-dictioary"
|
||||||
|
| P.Indirect i -> J.Number (string_of_int i)
|
||||||
|
|
||||||
|
let parse_content_stream str = J.String ("PARSED: str")
|
||||||
|
|
||||||
|
let json_of_pdf parse_content pdf =
|
||||||
|
let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in
|
||||||
|
let content_streams = ref [] in
|
||||||
|
let pairs =
|
||||||
|
let ps = ref [] in
|
||||||
|
Pdf.objiter
|
||||||
|
(fun i pdfobj ->
|
||||||
|
ps := (i, json_of_object (fun n -> content_streams := n::!content_streams) pdfobj)::!ps)
|
||||||
|
pdf;
|
||||||
|
trailerdict::!ps
|
||||||
|
in
|
||||||
|
let pairs_parsed =
|
||||||
|
if not parse_content then pairs else
|
||||||
|
List.map
|
||||||
|
(fun (objnum, obj) ->
|
||||||
|
if Pdfutil.mem objnum !content_streams then
|
||||||
|
begin match obj with
|
||||||
|
| J.Array [dict; J.String streamdata] ->
|
||||||
|
(objnum, J.Array [dict; parse_content_stream streamdata])
|
||||||
|
| _ -> failwith "json_of_pdf: stream parsing inconsistency"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
(objnum, obj))
|
||||||
|
pairs
|
||||||
|
in
|
||||||
|
J.Array
|
||||||
|
(List.map
|
||||||
|
(fun (objnum, jsonobj) -> J.Array [J.String (string_of_int objnum); jsonobj])
|
||||||
|
pairs_parsed)
|
||||||
|
|
||||||
let write fh parse_content pdf =
|
let write fh parse_content pdf =
|
||||||
let b = Buffer.create 256 in
|
let b = Buffer.create 256 in
|
||||||
let formatter = Format.formatter_of_buffer b in
|
let formatter = Format.formatter_of_buffer b in
|
||||||
Tjjson.format formatter test;
|
Tjjson.format formatter (json_of_pdf parse_content pdf);
|
||||||
Format.pp_print_flush formatter ();
|
Format.pp_print_flush formatter ();
|
||||||
output_string fh (Buffer.contents b)
|
output_string fh (Buffer.contents b)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue