cpdf-source/cpdfwriteJSON.ml

63 lines
2.3 KiB
OCaml
Raw Normal View History

2020-01-30 11:42:24 +01:00
module J = Tjjson
2020-01-30 14:10:03 +01:00
module P = Pdf
2020-01-30 11:42:24 +01:00
2020-01-30 14:10:03 +01:00
let rec json_of_object fcs = function
| P.Null -> J.String "null"
| P.Boolean b -> J.Bool b
| P.Integer i -> J.Number (string_of_int i)
| P.Real r -> J.Number (string_of_float r)
| P.String s -> J.String s
| P.Name n -> J.String n
| P.Array objs -> J.Array (List.map (json_of_object fcs) objs)
| P.Dictionary elts ->
(* Detect contents stream object numbers for second pass *)
List.iter (function ("/Contents", P.Indirect i) -> fcs i | _ -> ()) elts;
J.Object (List.map (fun (k, v) -> (k, json_of_object fcs v)) elts)
| P.Stream {contents = (Pdf.Dictionary dict, stream)} as thestream ->
Pdf.getstream thestream;
let str = match stream with Got b -> Pdfio.string_of_bytes b | ToGet _ -> "failure: toget" in
json_of_object fcs (P.Array [P.Dictionary dict; P.String str])
| P.Stream _ -> J.String "error: stream with not-a-dictioary"
| P.Indirect i -> J.Number (string_of_int i)
let parse_content_stream str = J.String ("PARSED: str")
let json_of_pdf parse_content pdf =
let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in
let content_streams = ref [] in
let pairs =
let ps = ref [] in
Pdf.objiter
(fun i pdfobj ->
ps := (i, json_of_object (fun n -> content_streams := n::!content_streams) pdfobj)::!ps)
pdf;
trailerdict::!ps
in
let pairs_parsed =
if not parse_content then pairs else
List.map
(fun (objnum, obj) ->
if Pdfutil.mem objnum !content_streams then
begin match obj with
| J.Array [dict; J.String streamdata] ->
(objnum, J.Array [dict; parse_content_stream streamdata])
| _ -> failwith "json_of_pdf: stream parsing inconsistency"
end
else
(objnum, obj))
pairs
in
J.Array
(List.map
(fun (objnum, jsonobj) -> J.Array [J.String (string_of_int objnum); jsonobj])
pairs_parsed)
2020-01-30 11:42:24 +01:00
let write fh parse_content pdf =
let b = Buffer.create 256 in
let formatter = Format.formatter_of_buffer b in
2020-01-30 14:10:03 +01:00
Tjjson.format formatter (json_of_pdf parse_content pdf);
2020-01-30 11:42:24 +01:00
Format.pp_print_flush formatter ();
output_string fh (Buffer.contents b)