Making a basic structure tree when drawing
This commit is contained in:
parent
7ff9c41e0e
commit
c70e960c66
69
cpdfdraw.ml
69
cpdfdraw.ml
|
@ -195,9 +195,15 @@ let update_resources pdf old_resources =
|
||||||
|
|
||||||
let mcidr = ref ~-1
|
let mcidr = ref ~-1
|
||||||
let mcid () = (incr mcidr; !mcidr)
|
let mcid () = (incr mcidr; !mcidr)
|
||||||
|
let mcpage = ref ~-1
|
||||||
|
|
||||||
|
(* The structure data, as it is created, in flat form. Later on, this will be
|
||||||
|
reconstructed into a structure tree. *)
|
||||||
type structdata =
|
type structdata =
|
||||||
| MCID of int
|
| StDataBeginTree of string
|
||||||
|
| StDataEndTree
|
||||||
|
| StDataMCID of string * int
|
||||||
|
| StDataPage of int
|
||||||
|
|
||||||
let structdata = ref []
|
let structdata = ref []
|
||||||
|
|
||||||
|
@ -304,7 +310,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
|
||||||
[]
|
[]
|
||||||
| TextSection ops ->
|
| TextSection ops ->
|
||||||
let m = mcid () in
|
let m = mcid () in
|
||||||
if not dryrun then structdata := MCID m::!structdata;
|
if not dryrun then structdata := StDataMCID ("/P", m)::!structdata;
|
||||||
[Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]);
|
[Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]);
|
||||||
Pdfops.Op_BT]
|
Pdfops.Op_BT]
|
||||||
@ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @
|
@ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @
|
||||||
|
@ -437,14 +443,61 @@ let dryrun ~filename ~bates ~batespad range pdf chunks =
|
||||||
restore_whole_stack r;
|
restore_whole_stack r;
|
||||||
fontpacks := saved_fontpacks
|
fontpacks := saved_fontpacks
|
||||||
|
|
||||||
(* Build a tree from the MCIDs and sturcture tree instructions gathered, and
|
type st =
|
||||||
add it to the PDF. *)
|
StMCID of int
|
||||||
|
| StItem of {kind : string; pageobjnum : int; children : st list}
|
||||||
|
|
||||||
|
(* Build a tree from the MCIDs and structure tree instructions gathered *)
|
||||||
let make_structure_tree pdf items =
|
let make_structure_tree pdf items =
|
||||||
|
(* Make map of page numbers to pageobjnums, and create a reference to keep track. *)
|
||||||
|
let pagenum = ref 0 in
|
||||||
|
let items_out = ref [] in
|
||||||
|
let pageobjnums =
|
||||||
|
let objnums = Pdf.page_reference_numbers pdf in
|
||||||
|
combine (indx objnums) objnums
|
||||||
|
in
|
||||||
|
(* Process the items, making the st list tree data structure *)
|
||||||
|
let process = function
|
||||||
|
| StDataMCID (n, mcid) ->
|
||||||
|
Printf.printf "Looking for page %i\n" !pagenum;
|
||||||
|
items_out =| StItem {kind = n; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]}
|
||||||
|
| StDataPage n ->
|
||||||
|
Printf.printf "Setting page number to %i\n" n;
|
||||||
|
pagenum := n
|
||||||
|
| _ -> ()
|
||||||
|
in
|
||||||
|
iter process items;
|
||||||
|
!items_out
|
||||||
|
|
||||||
|
(* Write such a structure tree to a PDF. We have to make the objects and build
|
||||||
|
the root and its /K. For now, we just have a root which contains everything
|
||||||
|
else on one level. Later we will use StDataBeginTree / StDataEndTree to make
|
||||||
|
more tree stuff. *)
|
||||||
|
|
||||||
|
(* Add the parent tree and parent pointers to the structure tree. *)
|
||||||
|
let add_parent_tree pdf =
|
||||||
()
|
()
|
||||||
|
|
||||||
|
let write_structure_tree pdf st =
|
||||||
|
let items =
|
||||||
|
map
|
||||||
|
(function StItem {kind; pageobjnum; children} ->
|
||||||
|
Pdf.Dictionary [("/S", Pdf.Name kind);
|
||||||
|
("/P", Pdf.Indirect pageobjnum);
|
||||||
|
("/K", Pdf.Array (map (function StMCID x -> Pdf.Integer x | _ -> assert false) children))]
|
||||||
|
| _ -> assert false
|
||||||
|
)
|
||||||
|
st
|
||||||
|
in
|
||||||
|
let st =
|
||||||
|
Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot");
|
||||||
|
("/K", Pdf.Array items)]
|
||||||
|
in
|
||||||
|
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", st);
|
||||||
|
add_parent_tree pdf
|
||||||
|
|
||||||
let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
|
||||||
(*Printf.printf "%s\n" (string_of_drawops drawops);*)
|
(*Printf.printf "%s\n" (string_of_drawops drawops);*)
|
||||||
mcidr := -1;
|
|
||||||
resstack := [empty_res ()];
|
resstack := [empty_res ()];
|
||||||
Hashtbl.clear !fontpacks;
|
Hashtbl.clear !fontpacks;
|
||||||
(res ()).time <- Cpdfstrftime.current_time ();
|
(res ()).time <- Cpdfstrftime.current_time ();
|
||||||
|
@ -454,7 +507,11 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra
|
||||||
let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in
|
let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in
|
||||||
let chunks = ref (split_around (eq NewPage) drawops) in
|
let chunks = ref (split_around (eq NewPage) drawops) in
|
||||||
dryrun ~filename ~bates ~batespad !range !pdf !chunks;
|
dryrun ~filename ~bates ~batespad !range !pdf !chunks;
|
||||||
|
mcidr := -1;
|
||||||
|
mcpage := 0;
|
||||||
while !chunks <> [] do
|
while !chunks <> [] do
|
||||||
|
mcpage += 1;
|
||||||
|
structdata =| StDataPage !mcpage;
|
||||||
reset_state ();
|
reset_state ();
|
||||||
if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks);
|
if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks);
|
||||||
chunks := tl !chunks;
|
chunks := tl !chunks;
|
||||||
|
@ -469,5 +526,5 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra
|
||||||
range := [endpage + 1]
|
range := [endpage + 1]
|
||||||
end
|
end
|
||||||
done;
|
done;
|
||||||
make_structure_tree pdf !structdata;
|
write_structure_tree !pdf (make_structure_tree !pdf (rev !structdata));
|
||||||
!pdf
|
!pdf
|
||||||
|
|
Loading…
Reference in New Issue