Making a basic structure tree when drawing

This commit is contained in:
John Whitington 2024-09-09 16:04:48 +01:00
parent 7ff9c41e0e
commit c70e960c66
1 changed files with 63 additions and 6 deletions

View File

@ -195,9 +195,15 @@ let update_resources pdf old_resources =
let mcidr = ref ~-1 let mcidr = ref ~-1
let mcid () = (incr mcidr; !mcidr) let mcid () = (incr mcidr; !mcidr)
let mcpage = ref ~-1
(* The structure data, as it is created, in flat form. Later on, this will be
reconstructed into a structure tree. *)
type structdata = type structdata =
| MCID of int | StDataBeginTree of string
| StDataEndTree
| StDataMCID of string * int
| StDataPage of int
let structdata = ref [] let structdata = ref []
@ -304,7 +310,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
[] []
| TextSection ops -> | TextSection ops ->
let m = mcid () in let m = mcid () in
if not dryrun then structdata := MCID m::!structdata; if not dryrun then structdata := StDataMCID ("/P", m)::!structdata;
[Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]); [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]);
Pdfops.Op_BT] Pdfops.Op_BT]
@ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @
@ -437,14 +443,61 @@ let dryrun ~filename ~bates ~batespad range pdf chunks =
restore_whole_stack r; restore_whole_stack r;
fontpacks := saved_fontpacks fontpacks := saved_fontpacks
(* Build a tree from the MCIDs and sturcture tree instructions gathered, and type st =
add it to the PDF. *) StMCID of int
| StItem of {kind : string; pageobjnum : int; children : st list}
(* Build a tree from the MCIDs and structure tree instructions gathered *)
let make_structure_tree pdf items = let make_structure_tree pdf items =
(* Make map of page numbers to pageobjnums, and create a reference to keep track. *)
let pagenum = ref 0 in
let items_out = ref [] in
let pageobjnums =
let objnums = Pdf.page_reference_numbers pdf in
combine (indx objnums) objnums
in
(* Process the items, making the st list tree data structure *)
let process = function
| StDataMCID (n, mcid) ->
Printf.printf "Looking for page %i\n" !pagenum;
items_out =| StItem {kind = n; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]}
| StDataPage n ->
Printf.printf "Setting page number to %i\n" n;
pagenum := n
| _ -> ()
in
iter process items;
!items_out
(* Write such a structure tree to a PDF. We have to make the objects and build
the root and its /K. For now, we just have a root which contains everything
else on one level. Later we will use StDataBeginTree / StDataEndTree to make
more tree stuff. *)
(* Add the parent tree and parent pointers to the structure tree. *)
let add_parent_tree pdf =
() ()
let write_structure_tree pdf st =
let items =
map
(function StItem {kind; pageobjnum; children} ->
Pdf.Dictionary [("/S", Pdf.Name kind);
("/P", Pdf.Indirect pageobjnum);
("/K", Pdf.Array (map (function StMCID x -> Pdf.Integer x | _ -> assert false) children))]
| _ -> assert false
)
st
in
let st =
Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot");
("/K", Pdf.Array items)]
in
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", st);
add_parent_tree pdf
let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
(*Printf.printf "%s\n" (string_of_drawops drawops);*) (*Printf.printf "%s\n" (string_of_drawops drawops);*)
mcidr := -1;
resstack := [empty_res ()]; resstack := [empty_res ()];
Hashtbl.clear !fontpacks; Hashtbl.clear !fontpacks;
(res ()).time <- Cpdfstrftime.current_time (); (res ()).time <- Cpdfstrftime.current_time ();
@ -454,7 +507,11 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra
let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in let drawops = match rev drawops with NewPage::t -> rev (NewPage::NewPage::t) | _ -> drawops in
let chunks = ref (split_around (eq NewPage) drawops) in let chunks = ref (split_around (eq NewPage) drawops) in
dryrun ~filename ~bates ~batespad !range !pdf !chunks; dryrun ~filename ~bates ~batespad !range !pdf !chunks;
mcidr := -1;
mcpage := 0;
while !chunks <> [] do while !chunks <> [] do
mcpage += 1;
structdata =| StDataPage !mcpage;
reset_state (); reset_state ();
if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks); if hd !chunks <> [] then pdf := draw_single ~fast ~underneath ~filename ~bates ~batespad !range !pdf (hd !chunks);
chunks := tl !chunks; chunks := tl !chunks;
@ -469,5 +526,5 @@ let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf dra
range := [endpage + 1] range := [endpage + 1]
end end
done; done;
make_structure_tree pdf !structdata; write_structure_tree !pdf (make_structure_tree !pdf (rev !structdata));
!pdf !pdf