Gate -table-of-contents for -process-struct-tree

This commit is contained in:
John Whitington
2025-03-07 17:00:17 +00:00
parent 0a1be154ad
commit f94fb702ec

View File

@ -151,6 +151,7 @@ let add_to_parent_tree pdf p =
CropBox. Font size of title twice body font size. Null page labels added for CropBox. Font size of title twice body font size. Null page labels added for
TOC, others bumped up and so preserved. *) TOC, others bumped up and so preserved. *)
let typeset_table_of_contents ~font ~fontsize ~title ~bookmark ~dotleader ~process_struct_tree pdf = let typeset_table_of_contents ~font ~fontsize ~title ~bookmark ~dotleader ~process_struct_tree pdf =
let optional l = if process_struct_tree then l else [] in
Hashtbl.clear width_table_cache; Hashtbl.clear width_table_cache;
let marks = Pdfmarks.read_bookmarks pdf in let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else
@ -203,22 +204,22 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark ~dotleader ~proce
else [Cpdftype.HGlue space] else [Cpdftype.HGlue space]
in in
[Cpdftype.BeginDest (mark.Pdfmarks.target, Some mark.Pdfmarks.text); Cpdftype.HGlue indent] [Cpdftype.BeginDest (mark.Pdfmarks.target, Some mark.Pdfmarks.text); Cpdftype.HGlue indent]
@ [Cpdftype.Tag ("Link", 0)] @ textruns @ [Cpdftype.EndTag] @ optional [(Cpdftype.Tag ("Link", 0))] @ textruns @ optional [Cpdftype.EndTag]
@ leader @ leader
@ [Cpdftype.Tag ("Link", 0)] @ labelruns @ [Cpdftype.EndTag] @ optional [Cpdftype.Tag ("Link", 0)] @ labelruns @ optional [Cpdftype.EndTag]
@ [Cpdftype.EndDest; Cpdftype.NewLine]) @ [Cpdftype.EndDest; Cpdftype.NewLine])
(Pdfmarks.read_bookmarks pdf) (Pdfmarks.read_bookmarks pdf)
in in
let toc_pages, toc_tags = let toc_pages, toc_tags =
let title = let title =
let glue = Cpdftype.VGlue (fontsize *. 2.) in let glue = Cpdftype.VGlue (fontsize *. 2.) in
[Cpdftype.Tag ("P", 0)] optional [Cpdftype.Tag ("P", 0)]
@ flatten @ flatten
(map (map
(fun l -> l @ [Cpdftype.NewLine]) (fun l -> l @ [Cpdftype.NewLine])
(map (of_utf8 fontpack (fontsize *. 2.)) (map implode (split_toc_title (explode title))))) (map (of_utf8 fontpack (fontsize *. 2.)) (map implode (split_toc_title (explode title)))))
@ @
[Cpdftype.EndTag; glue] optional [Cpdftype.EndTag] @ [glue]
in in
let lm, rm, tm, bm = let lm, rm, tm, bm =
match firstpage_cropbox with match firstpage_cropbox with
@ -254,85 +255,88 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark ~dotleader ~proce
Pdfpagelabels.startpage = 1; Pdfpagelabels.startpage = 1;
Pdfpagelabels.startvalue = 1} Pdfpagelabels.startvalue = 1}
in in
let struct_tree_root = if process_struct_tree then
match Pdf.lookup_immediate "/StructTreeRoot" (Pdf.lookup_obj pdf pdf.Pdf.root) with begin
| Some (Pdf.Indirect i) -> i let struct_tree_root =
| _ -> 0 (* Will never be written, because we only write if there is an existing tree. To revisit. *) match Pdf.lookup_immediate "/StructTreeRoot" (Pdf.lookup_obj pdf pdf.Pdf.root) with
in | Some (Pdf.Indirect i) -> i
let p_struct_elem_first_page = | _ -> 0 (* Will never be written, because we only write if there is an existing tree. To revisit. *)
Pdf.addobj pdf in
(Pdf.Dictionary [("/S", Pdf.Name "/P"); let p_struct_elem_first_page =
("/Pg", Pdf.Indirect (hd toc_pageobjnums)); Pdf.addobj pdf
("/K", Pdf.Array [Pdf.Integer 0]); (Pdf.Dictionary [("/S", Pdf.Name "/P");
("/P", Pdf.Indirect struct_tree_root)]) ("/Pg", Pdf.Indirect (hd toc_pageobjnums));
in ("/K", Pdf.Array [Pdf.Integer 0]);
let mcid = ref 1 in ("/P", Pdf.Indirect struct_tree_root)])
let link_struct_elems_for_each_page = in
map2 let mcid = ref 1 in
(fun page pageobjnum -> let link_struct_elems_for_each_page =
let annot_objnums = map2
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with (fun page pageobjnum ->
| Some (Pdf.Array a) -> map (function Pdf.Indirect i -> i | _ -> 0) a let annot_objnums =
| _ -> [] match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
in | Some (Pdf.Array a) -> map (function Pdf.Indirect i -> i | _ -> 0) a
let r = map | _ -> []
(fun annot_i -> in
let r = let r = map
let objr = Pdf.addobj pdf (Pdf.Dictionary [("/Type", Pdf.Name "/OBJR"); ("/Obj", Pdf.Indirect annot_i)]) in (fun annot_i ->
Pdf.addobj pdf let r =
(Pdf.Dictionary [("/S", Pdf.Name "/Link"); let objr = Pdf.addobj pdf (Pdf.Dictionary [("/Type", Pdf.Name "/OBJR"); ("/Obj", Pdf.Indirect annot_i)]) in
("/K", Pdf.Array [Pdf.Integer !mcid; Pdf.Indirect objr]); Pdf.addobj pdf
("/P", Pdf.Indirect struct_tree_root); (Pdf.Dictionary [("/S", Pdf.Name "/Link");
("/Pg", Pdf.Indirect pageobjnum)]) ("/K", Pdf.Array [Pdf.Integer !mcid; Pdf.Indirect objr]);
in ("/P", Pdf.Indirect struct_tree_root);
incr mcid; r) ("/Pg", Pdf.Indirect pageobjnum)])
annot_objnums in
in incr mcid; r)
mcid := 0; r) annot_objnums
toc_pages in
toc_pageobjnums mcid := 0; r)
in toc_pages
let prepending_structitems = toc_pageobjnums
map (fun x -> Pdf.Indirect x) (p_struct_elem_first_page::flatten link_struct_elems_for_each_page) in
in let prepending_structitems =
(* Add the key and value structure item (any p, and that page's links) to the parent tree for each TOC page *) map (fun x -> Pdf.Indirect x) (p_struct_elem_first_page::flatten link_struct_elems_for_each_page)
let toc_structure_items_per_page = in
match link_struct_elems_for_each_page with (* Add the key and value structure item (any p, and that page's links) to the parent tree for each TOC page *)
| h::t -> (p_struct_elem_first_page::h)::t let toc_structure_items_per_page =
| [] -> [] match link_struct_elems_for_each_page with
in | h::t -> (p_struct_elem_first_page::h)::t
iter2 | [] -> []
(fun o ns -> in
let page = Pdf.lookup_obj pdf o in iter2
(* For each annotation, add a structparent entry too. *) (fun o ns ->
let annot_objnums = let page = Pdf.lookup_obj pdf o in
match Pdf.lookup_direct pdf "/Annots" page with (* For each annotation, add a structparent entry too. *)
| Some (Pdf.Array a) -> map (function Pdf.Indirect i -> i | _ -> 0) a let annot_objnums =
| _ -> [] match Pdf.lookup_direct pdf "/Annots" page with
in | Some (Pdf.Array a) -> map (function Pdf.Indirect i -> i | _ -> 0) a
(* Remove the Title P from first page list *) | _ -> []
let ns2 = if length ns > length annot_objnums then tl ns else ns in in
iter2 (* Remove the Title P from first page list *)
(fun annot_objnum n -> let ns2 = if length ns > length annot_objnums then tl ns else ns in
let annot = Pdf.lookup_obj pdf annot_objnum in iter2
let sp_num = add_to_parent_tree pdf (Pdf.Indirect n) in (fun annot_objnum n ->
let new_annot = Pdf.add_dict_entry annot "/StructParent" (Pdf.Integer sp_num) in let annot = Pdf.lookup_obj pdf annot_objnum in
Pdf.addobj_given_num pdf (annot_objnum, new_annot)) let sp_num = add_to_parent_tree pdf (Pdf.Indirect n) in
annot_objnums let new_annot = Pdf.add_dict_entry annot "/StructParent" (Pdf.Integer sp_num) in
ns2; Pdf.addobj_given_num pdf (annot_objnum, new_annot))
let ptn = add_to_parent_tree pdf (Pdf.Array (map (fun x -> Pdf.Indirect x) ns)) in annot_objnums
Pdf.addobj_given_num pdf (o, Pdf.add_dict_entry page "/StructParents" (Pdf.Integer ptn))) ns2;
toc_pageobjnums let ptn = add_to_parent_tree pdf (Pdf.Array (map (fun x -> Pdf.Indirect x) ns)) in
toc_structure_items_per_page; Pdf.addobj_given_num pdf (o, Pdf.add_dict_entry page "/StructParents" (Pdf.Integer ptn)))
remove_parent_tree_next_key pdf; toc_pageobjnums
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/K"] with toc_structure_items_per_page;
| Some (Pdf.Array a) -> remove_parent_tree_next_key pdf;
Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/K"] (Pdf.Array (prepending_structitems @ a)) begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/K"] with
| Some (Pdf.Dictionary d) -> | Some (Pdf.Array a) ->
Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/K"] (Pdf.Array (prepending_structitems @ [Pdf.Dictionary d])) Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/K"] (Pdf.Array (prepending_structitems @ a))
| _ -> | Some (Pdf.Dictionary d) ->
() (* None found. In future, may fabricate. For now, no. *) Pdf.replace_chain pdf ["/Root"; "/StructTreeRoot"; "/K"] (Pdf.Array (prepending_structitems @ [Pdf.Dictionary d]))
end; | _ ->
() (* None found. In future, may fabricate. For now, no. *)
end
end;
let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in
Pdfpagelabels.write pdf labels'; Pdfpagelabels.write pdf labels';
if bookmark then if bookmark then