Obet -draw-struct-trees properly
This commit is contained in:
parent
9ed77b6950
commit
eeb64d48df
44
cpdfdraw.ml
44
cpdfdraw.ml
|
@ -207,9 +207,9 @@ type structdata =
|
||||||
|
|
||||||
let structdata = ref []
|
let structdata = ref []
|
||||||
|
|
||||||
let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = function
|
let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page = function
|
||||||
| Qq ops ->
|
| Qq ops ->
|
||||||
[Pdfops.Op_q] @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q]
|
[Pdfops.Op_q] @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_Q]
|
||||||
| Matrix m -> [Pdfops.Op_cm m]
|
| Matrix m -> [Pdfops.Op_cm m]
|
||||||
| Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)]
|
| Rect (x, y, w, h) -> [Pdfops.Op_re (x, y, w, h)]
|
||||||
| Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)]
|
| Bezier (a, b, c, d, e, f) -> [Pdfops.Op_c (a, b, c, d, e, f)]
|
||||||
|
@ -245,7 +245,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
|
||||||
| SetMiterLimit m -> [Pdfops.Op_M m]
|
| SetMiterLimit m -> [Pdfops.Op_M m]
|
||||||
| SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)]
|
| SetDashPattern (x, y) -> [Pdfops.Op_d (x, y)]
|
||||||
| FormXObject (a, b, c, d, n, ops) ->
|
| FormXObject (a, b, c, d, n, ops) ->
|
||||||
create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops;
|
create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops;
|
||||||
[]
|
[]
|
||||||
| Use n ->
|
| Use n ->
|
||||||
let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in
|
let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in
|
||||||
|
@ -256,7 +256,9 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
|
||||||
if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata;
|
if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata;
|
||||||
let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in
|
let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in
|
||||||
(res ()).page_names <- pdfname::(res ()).page_names;
|
(res ()).page_names <- pdfname::(res ()).page_names;
|
||||||
[Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_Do pdfname; Pdfops.Op_EMC]
|
(if struct_tree then [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else [])
|
||||||
|
@ [Pdfops.Op_Do pdfname]
|
||||||
|
@ (if struct_tree then [Pdfops.Op_EMC] else [])
|
||||||
| ImageXObject (s, obj) ->
|
| ImageXObject (s, obj) ->
|
||||||
Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);
|
Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);
|
||||||
[]
|
[]
|
||||||
|
@ -313,11 +315,11 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
|
||||||
| TextSection ops ->
|
| TextSection ops ->
|
||||||
let m = mcid () in
|
let m = mcid () in
|
||||||
if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata;
|
if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata;
|
||||||
[Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]);
|
(if struct_tree then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else [])
|
||||||
Pdfops.Op_BT]
|
@ [Pdfops.Op_BT]
|
||||||
@ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @
|
@ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops
|
||||||
[Pdfops.Op_ET;
|
@ [Pdfops.Op_ET]
|
||||||
Pdfops.Op_EMC]
|
@ (if struct_tree then [Pdfops.Op_EMC] else [])
|
||||||
| Text s ->
|
| Text s ->
|
||||||
if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s);
|
if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s);
|
||||||
runs_of_utf8 s
|
runs_of_utf8 s
|
||||||
|
@ -333,14 +335,14 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
|
||||||
| Rise f -> [Pdfops.Op_Ts f]
|
| Rise f -> [Pdfops.Op_Ts f]
|
||||||
| Newline -> [Pdfops.Op_T']
|
| Newline -> [Pdfops.Op_T']
|
||||||
|
|
||||||
and ops_of_drawops dryrun pdf endpage filename bates batespad num page drawops =
|
and ops_of_drawops draw_struct dryrun pdf endpage filename bates batespad num page drawops =
|
||||||
flatten (map (ops_of_drawop dryrun pdf endpage filename bates batespad num page) drawops)
|
flatten (map (ops_of_drawop draw_struct dryrun pdf endpage filename bates batespad num page) drawops)
|
||||||
|
|
||||||
and create_form_xobject dryrun a b c d pdf endpage filename bates batespad num page n ops =
|
and create_form_xobject struct_tree dryrun a b c d pdf endpage filename bates batespad num page n ops =
|
||||||
respush ();
|
respush ();
|
||||||
reset_state ();
|
reset_state ();
|
||||||
let data =
|
let data =
|
||||||
Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops dryrun pdf endpage filename bates batespad num page ops))
|
Pdfio.bytes_of_string (Pdfops.string_of_ops (ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops))
|
||||||
in
|
in
|
||||||
let obj =
|
let obj =
|
||||||
Pdf.Stream
|
Pdf.Stream
|
||||||
|
@ -420,14 +422,14 @@ let add_artifacts ops =
|
||||||
in
|
in
|
||||||
loop [] ops
|
loop [] ops
|
||||||
|
|
||||||
let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
||||||
(res ()).num <- max (res ()).num (minimum_resource_number pdf range);
|
(res ()).num <- max (res ()).num (minimum_resource_number pdf range);
|
||||||
let endpage = Pdfpage.endpage pdf in
|
let endpage = Pdfpage.endpage pdf in
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
let ops =
|
let ops =
|
||||||
if contains_specials drawops
|
if contains_specials drawops
|
||||||
then None
|
then None
|
||||||
else Some (ops_of_drawops false pdf endpage filename bates batespad 0 (hd pages) drawops)
|
else Some (ops_of_drawops struct_tree false pdf endpage filename bates batespad 0 (hd pages) drawops)
|
||||||
in
|
in
|
||||||
let ss =
|
let ss =
|
||||||
map2
|
map2
|
||||||
|
@ -436,7 +438,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
||||||
then
|
then
|
||||||
(match ops with
|
(match ops with
|
||||||
| Some x -> x
|
| Some x -> x
|
||||||
| None -> ops_of_drawops false pdf endpage filename bates batespad n p drawops)
|
| None -> ops_of_drawops struct_tree false pdf endpage filename bates batespad n p drawops)
|
||||||
else [])
|
else [])
|
||||||
(ilist 1 endpage)
|
(ilist 1 endpage)
|
||||||
pages
|
pages
|
||||||
|
@ -445,7 +447,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
||||||
map3
|
map3
|
||||||
(fun n p ops ->
|
(fun n p ops ->
|
||||||
if not (mem n range) then p else
|
if not (mem n range) then p else
|
||||||
let ops = add_artifacts ops in
|
let ops = if struct_tree then add_artifacts ops else ops in
|
||||||
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
|
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
|
||||||
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
|
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
|
||||||
(ilist 1 endpage)
|
(ilist 1 endpage)
|
||||||
|
@ -455,7 +457,7 @@ let draw_single ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
||||||
Pdfpage.change_pages true pdf pages
|
Pdfpage.change_pages true pdf pages
|
||||||
|
|
||||||
(* Do a dry run of all the drawing to collect subset information. *)
|
(* Do a dry run of all the drawing to collect subset information. *)
|
||||||
let dryrun ~filename ~bates ~batespad range pdf chunks =
|
let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks =
|
||||||
let endpage = Pdfpage.endpage pdf in
|
let endpage = Pdfpage.endpage pdf in
|
||||||
let pages = Pdfpage.pages_of_pagetree pdf in
|
let pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
let r = save_whole_stack () in
|
let r = save_whole_stack () in
|
||||||
|
@ -463,7 +465,7 @@ let dryrun ~filename ~bates ~batespad range pdf chunks =
|
||||||
let pagenum = ref (hd range) in
|
let pagenum = ref (hd range) in
|
||||||
iter
|
iter
|
||||||
(fun chunk ->
|
(fun chunk ->
|
||||||
ignore (ops_of_drawops true pdf endpage filename bates batespad !pagenum (hd pages) chunk);
|
ignore (ops_of_drawops struct_tree true pdf endpage filename bates batespad !pagenum (hd pages) chunk);
|
||||||
match range with
|
match range with
|
||||||
| [x] when endpage > x -> pagenum := x + 1
|
| [x] when endpage > x -> pagenum := x + 1
|
||||||
| _ -> pagenum := endpage + 1)
|
| _ -> pagenum := endpage + 1)
|
||||||
|
@ -554,14 +556,14 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra
|
||||||
(* Double up a trailing NewPage so it actually does something... *)
|
(* Double up a trailing NewPage so it actually does something... *)
|
||||||
let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in
|
let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in
|
||||||
let chunks = ref (split_around (eq NewPage) drawops) in
|
let chunks = ref (split_around (eq NewPage) drawops) in
|
||||||
dryrun ~filename ~bates ~batespad !range !pdf !chunks;
|
dryrun ~struct_tree ~filename ~bates ~batespad !range !pdf !chunks;
|
||||||
mcpage := 0;
|
mcpage := 0;
|
||||||
while !chunks <> [] do
|
while !chunks <> [] do
|
||||||
mcidr := -1;
|
mcidr := -1;
|
||||||
mcpage += 1;
|
mcpage += 1;
|
||||||
structdata =| StDataPage !mcpage;
|
structdata =| StDataPage !mcpage;
|
||||||
reset_state ();
|
reset_state ();
|
||||||
if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks);
|
if hd !chunks <> [] then pdf := draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks);
|
||||||
chunks := tl !chunks;
|
chunks := tl !chunks;
|
||||||
if !chunks <> [] then begin
|
if !chunks <> [] then begin
|
||||||
(* If the range is just a single page, and there is a next page, move to it. Otherwise,
|
(* If the range is just a single page, and there is a next page, move to it. Otherwise,
|
||||||
|
|
Loading…
Reference in New Issue