Form xobject JSON parsing
This commit is contained in:
parent
c5dd9f3c2a
commit
36ad2c6eb1
|
@ -1,3 +1,4 @@
|
||||||
|
%Document \n in bookmark files
|
||||||
\documentclass{book}
|
\documentclass{book}
|
||||||
\usepackage{palatino}
|
\usepackage{palatino}
|
||||||
\usepackage{microtype}
|
\usepackage{microtype}
|
||||||
|
|
|
@ -7,14 +7,14 @@ let soi = string_of_int
|
||||||
let string_of_float _ = failwith "use sof"
|
let string_of_float _ = failwith "use sof"
|
||||||
let string_of_int _ = failwith "use soi"
|
let string_of_int _ = failwith "use soi"
|
||||||
|
|
||||||
let rec json_of_object fcs no_stream_data = function
|
let rec json_of_object pdf fcs no_stream_data = function
|
||||||
| P.Null -> J.String "null"
|
| P.Null -> J.String "null"
|
||||||
| P.Boolean b -> J.Bool b
|
| P.Boolean b -> J.Bool b
|
||||||
| P.Integer i -> J.Number (soi i)
|
| P.Integer i -> J.Number (soi i)
|
||||||
| P.Real r -> J.Number (sof r)
|
| P.Real r -> J.Number (sof r)
|
||||||
| P.String s -> J.String s
|
| P.String s -> J.String s
|
||||||
| P.Name n -> J.String n
|
| P.Name n -> J.String n
|
||||||
| P.Array objs -> J.Array (List.map (json_of_object fcs no_stream_data) objs)
|
| P.Array objs -> J.Array (List.map (json_of_object pdf fcs no_stream_data) objs)
|
||||||
| P.Dictionary elts ->
|
| P.Dictionary elts ->
|
||||||
List.iter
|
List.iter
|
||||||
(function
|
(function
|
||||||
|
@ -22,18 +22,27 @@ let rec json_of_object fcs no_stream_data = function
|
||||||
| ("/Contents", P.Array elts) -> List.iter (function P.Indirect i -> fcs i | _ -> ()) elts
|
| ("/Contents", P.Array elts) -> List.iter (function P.Indirect i -> fcs i | _ -> ()) elts
|
||||||
| _ -> ())
|
| _ -> ())
|
||||||
elts;
|
elts;
|
||||||
J.Object (List.map (fun (k, v) -> (k, json_of_object fcs no_stream_data v)) elts)
|
J.Object (List.map (fun (k, v) -> (k, json_of_object pdf fcs no_stream_data v)) elts)
|
||||||
| P.Stream {contents = (Pdf.Dictionary dict, stream)} as thestream ->
|
| P.Stream {contents = (Pdf.Dictionary dict, stream)} as thestream ->
|
||||||
Pdf.getstream thestream;
|
Pdf.getstream thestream;
|
||||||
let str =
|
let str =
|
||||||
if no_stream_data then "<<stream data elided>>" else
|
if no_stream_data then "<<stream data elided>>" else
|
||||||
match stream with Pdf.Got b -> Pdfio.string_of_bytes b | Pdf.ToGet _ -> "failure: toget"
|
match stream with Pdf.Got b -> Pdfio.string_of_bytes b | Pdf.ToGet _ -> "failure: toget"
|
||||||
in
|
in
|
||||||
json_of_object fcs no_stream_data (P.Array [P.Dictionary dict; P.String str])
|
json_of_object pdf fcs no_stream_data (P.Array [P.Dictionary dict; P.String str])
|
||||||
| P.Stream _ -> J.String "error: stream with not-a-dictioary"
|
| P.Stream _ -> J.String "error: stream with not-a-dictioary"
|
||||||
| P.Indirect i -> J.Number (soi i)
|
| P.Indirect i ->
|
||||||
|
begin match Pdf.lookup_obj pdf i with
|
||||||
|
| P.Stream {contents = (Pdf.Dictionary dict as d, _)} ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/Subtype" d with
|
||||||
|
| Some (Pdf.Name "/Form") -> fcs i
|
||||||
|
| _ -> ()
|
||||||
|
end
|
||||||
|
| _ -> ()
|
||||||
|
end;
|
||||||
|
J.Number (soi i)
|
||||||
|
|
||||||
let json_of_op no_stream_data = function
|
let json_of_op pdf no_stream_data = function
|
||||||
| O.Op_S -> J.Array [J.String "S"]
|
| O.Op_S -> J.Array [J.String "S"]
|
||||||
| O.Op_s -> J.Array [J.String "s"]
|
| O.Op_s -> J.Array [J.String "s"]
|
||||||
| O.Op_f -> J.Array [J.String "f"]
|
| O.Op_f -> J.Array [J.String "f"]
|
||||||
|
@ -114,7 +123,7 @@ let json_of_op no_stream_data = function
|
||||||
J.Number (sof t.Pdftransform.f);
|
J.Number (sof t.Pdftransform.f);
|
||||||
J.String "Tm"]
|
J.String "Tm"]
|
||||||
| O.Op_Tj s -> J.Array [J.String s; J.String "Tj"]
|
| O.Op_Tj s -> J.Array [J.String s; J.String "Tj"]
|
||||||
| O.Op_TJ pdfobject -> J.Array [json_of_object (fun _ -> ()) no_stream_data pdfobject; J.String "TJ"]
|
| O.Op_TJ pdfobject -> J.Array [json_of_object pdf (fun _ -> ()) no_stream_data pdfobject; J.String "TJ"]
|
||||||
| O.Op_' s -> J.Array [J.String s; J.String "'"]
|
| O.Op_' s -> J.Array [J.String s; J.String "'"]
|
||||||
| O.Op_'' (k, k', s) -> J.Array [J.Number (sof k); J.Number (sof k'); J.String s; J.String "''"]
|
| O.Op_'' (k, k', s) -> J.Array [J.Number (sof k); J.Number (sof k'); J.String s; J.String "''"]
|
||||||
| O.Op_d0 (k, k') -> J.Array [J.Number (sof k); J.Number (sof k'); J.String "d0"]
|
| O.Op_d0 (k, k') -> J.Array [J.Number (sof k); J.Number (sof k'); J.String "d0"]
|
||||||
|
@ -139,8 +148,8 @@ let json_of_op no_stream_data = function
|
||||||
J.Array (List.map (fun x -> J.Number (sof x)) fs @ [J.String s; J.String "SCNName"])
|
J.Array (List.map (fun x -> J.Number (sof x)) fs @ [J.String s; J.String "SCNName"])
|
||||||
| O.Op_scnName (s, fs) ->
|
| O.Op_scnName (s, fs) ->
|
||||||
J.Array (List.map (fun x -> J.Number (sof x)) fs @ [J.String s; J.String "scnName"])
|
J.Array (List.map (fun x -> J.Number (sof x)) fs @ [J.String s; J.String "scnName"])
|
||||||
| O.InlineImage (dict, data) -> J.Array [json_of_object (fun _ -> ()) no_stream_data dict; J.String (Pdfio.string_of_bytes data)]
|
| O.InlineImage (dict, data) -> J.Array [json_of_object pdf (fun _ -> ()) no_stream_data dict; J.String (Pdfio.string_of_bytes data)]
|
||||||
| O.Op_DP (s, obj) -> J.Array [J.String s; json_of_object (fun _ -> ()) no_stream_data obj; J.String "DP"]
|
| O.Op_DP (s, obj) -> J.Array [J.String s; json_of_object pdf (fun _ -> ()) no_stream_data obj; J.String "DP"]
|
||||||
|
|
||||||
(* parse_stream needs pdf and resources. These are for lexing of inline images,
|
(* parse_stream needs pdf and resources. These are for lexing of inline images,
|
||||||
* looking up the colourspace. We do not need to worry about inherited
|
* looking up the colourspace. We do not need to worry about inherited
|
||||||
|
@ -148,17 +157,17 @@ let json_of_op no_stream_data = function
|
||||||
* PDF standard. *)
|
* PDF standard. *)
|
||||||
let parse_content_stream pdf resources bs =
|
let parse_content_stream pdf resources bs =
|
||||||
let ops = Pdfops.parse_stream pdf resources [bs] in
|
let ops = Pdfops.parse_stream pdf resources [bs] in
|
||||||
J.Array (List.map (json_of_op false) ops)
|
J.Array (List.map (json_of_op pdf false) ops)
|
||||||
|
|
||||||
let json_of_pdf parse_content no_stream_data pdf =
|
let json_of_pdf parse_content no_stream_data pdf =
|
||||||
let trailerdict = (0, json_of_object (fun x -> ()) no_stream_data pdf.Pdf.trailerdict) in
|
let trailerdict = (0, json_of_object pdf (fun x -> ()) no_stream_data pdf.Pdf.trailerdict) in
|
||||||
let content_streams = ref [] in
|
let content_streams = ref [] in
|
||||||
let fcs n = content_streams := n::!content_streams in
|
let fcs n = content_streams := n::!content_streams in
|
||||||
let pairs =
|
let pairs =
|
||||||
let ps = ref [] in
|
let ps = ref [] in
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun i pdfobj ->
|
(fun i pdfobj ->
|
||||||
ps := (i, json_of_object fcs no_stream_data pdfobj)::!ps)
|
ps := (i, json_of_object pdf fcs no_stream_data pdfobj)::!ps)
|
||||||
pdf;
|
pdf;
|
||||||
trailerdict::!ps
|
trailerdict::!ps
|
||||||
in
|
in
|
||||||
|
|
Loading…
Reference in New Issue