First rough JSON outputter
This commit is contained in:
parent
b09235e28b
commit
4b801006e8
|
@ -120,33 +120,43 @@ let json_of_op = function
|
||||||
| O.Op_BMC s -> J.Array [J.String "BMC"]
|
| O.Op_BMC s -> J.Array [J.String "BMC"]
|
||||||
| O.Op_Unknown _ ->J.Array [J.String "Unknown"] *)
|
| O.Op_Unknown _ ->J.Array [J.String "Unknown"] *)
|
||||||
|
|
||||||
let parse_content_stream str =
|
(* parse_stream needs pdf and resources. These are for lexing of inline images,
|
||||||
let ops = Pdfops.parse_stream (Pdf.empty ()) (Pdf.Dictionary []) [Pdfio.bytes_of_string str] in
|
* looking up the colourspace. We do not need to worry about inherited
|
||||||
|
* resources, though? For now, don't worry about inherited resources: check in
|
||||||
|
* PDF standard. *)
|
||||||
|
let parse_content_stream pdf resources bs =
|
||||||
|
let ops = Pdfops.parse_stream pdf resources [bs] in
|
||||||
J.Array (List.map json_of_op ops)
|
J.Array (List.map json_of_op ops)
|
||||||
|
|
||||||
let json_of_pdf parse_content pdf =
|
let json_of_pdf parse_content pdf =
|
||||||
let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in
|
let trailerdict = (0, json_of_object (fun x -> ()) pdf.Pdf.trailerdict) in
|
||||||
let content_streams = ref [] in
|
let content_streams = ref [] in
|
||||||
|
let fcs n = content_streams := n::!content_streams in
|
||||||
let pairs =
|
let pairs =
|
||||||
let ps = ref [] in
|
let ps = ref [] in
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun i pdfobj ->
|
(fun i pdfobj ->
|
||||||
ps := (i, json_of_object (fun n -> content_streams := n::!content_streams) pdfobj)::!ps)
|
ps := (i, json_of_object fcs pdfobj)::!ps)
|
||||||
pdf;
|
pdf;
|
||||||
trailerdict::!ps
|
trailerdict::!ps
|
||||||
in
|
in
|
||||||
List.iter (Printf.printf "Found content stream %i\n") !content_streams;
|
List.iter (Printf.printf "Found content stream %i\n") !content_streams;
|
||||||
List.iter
|
List.iter (fun n -> Pdfcodec.decode_pdfstream_until_unknown pdf (Pdf.lookup_obj pdf n)) !content_streams;
|
||||||
(fun n -> try Pdfcodec.decode_pdfstream_until_unknown pdf (Pdf.lookup_obj pdf n) with _ -> ())
|
(* Debug PDF to file here *)
|
||||||
!content_streams;
|
|
||||||
let pairs_parsed =
|
let pairs_parsed =
|
||||||
if not parse_content then pairs else
|
if not parse_content then pairs else
|
||||||
List.map
|
List.map
|
||||||
(fun (objnum, obj) ->
|
(fun (objnum, obj) ->
|
||||||
if Pdfutil.mem objnum !content_streams then
|
if Pdfutil.mem objnum !content_streams then
|
||||||
begin match obj with
|
begin match obj with
|
||||||
| J.Array [dict; J.String streamdata] ->
|
| J.Array [dict; J.String _] ->
|
||||||
(objnum, J.Array [dict; parse_content_stream streamdata])
|
(* FIXME Proper resources here for reasons explained above *)
|
||||||
|
let streamdata =
|
||||||
|
match Pdf.lookup_obj pdf objnum with
|
||||||
|
| Stream {contents = (_, Got b)} -> b
|
||||||
|
| _ -> failwith "JSON: stream not decoded"
|
||||||
|
in
|
||||||
|
(objnum, J.Array [dict; parse_content_stream pdf (Pdf.Dictionary []) streamdata])
|
||||||
| _ -> failwith "json_of_pdf: stream parsing inconsistency"
|
| _ -> failwith "json_of_pdf: stream parsing inconsistency"
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
|
Loading…
Reference in New Issue