First stab at %Bookmark<n>

This commit is contained in:
John Whitington 2024-11-11 16:47:49 +00:00
parent 3ae4a373b5
commit 1491f30094
4 changed files with 38 additions and 4 deletions

View File

@ -15,6 +15,7 @@ o Show OpenAction in -info
o Show more form information in -info o Show more form information in -info
o Show XFA in -info o Show XFA in -info
o Allow JSON / PDF syntax in dict processing and object exploration o Allow JSON / PDF syntax in dict processing and object exploration
o %Bookmark and friends when stamping text
Fixes: Fixes:

View File

@ -177,7 +177,24 @@ let pagelabel pdf num =
num num
(Pdfpagelabels.complete (Pdfpagelabels.read pdf)) (Pdfpagelabels.complete (Pdfpagelabels.read pdf))
let replace_pairs pdf endpage extract_text_font_size filename bates batespad num page = (* Return UTF8 of current bookmark at given level at start of page. No bookmark
available = empty string. *)
let bookmark marks fastrefnums level pdf num =
let before, _ =
(* 1. Pick all marks up to and including those on the needed page. *)
cleavewhile (fun mark -> Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target <= num) marks
in
match
(* 2. Remove from the list anything up to the last mark which is at higher
level. This prevents sections in an earlier chapter showing up as
bookmarks in a later chapter if no section has yet been introduced in
that chapter. Do this by reversing, then keeping everything up to any higher level. Then re-reverse. *)
rev (fst (cleavewhile (fun mark -> mark.Pdfmarks.level = level) (rev before)))
with
| h::_ -> Pdftext.utf8_of_pdfdocstring h.Pdfmarks.text
| [] -> ""
let replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page =
[ [
"%PageDiv2", (fun () -> string_of_int ((num + 1) / 2)); "%PageDiv2", (fun () -> string_of_int ((num + 1) / 2));
"%Page", (fun () -> string_of_int num); "%Page", (fun () -> string_of_int num);
@ -187,6 +204,11 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
"%Label", (fun () -> pagelabel pdf num); "%Label", (fun () -> pagelabel pdf num);
"%EndPage", (fun () -> string_of_int endpage); "%EndPage", (fun () -> string_of_int endpage);
"%EndLabel", (fun () -> pagelabel pdf endpage); "%EndLabel", (fun () -> pagelabel pdf endpage);
"%Bookmark0", (fun () -> bookmark marks fastrefnums 0 pdf num);
"%Bookmark1", (fun () -> bookmark marks fastrefnums 1 pdf num);
"%Bookmark2", (fun () -> bookmark marks fastrefnums 2 pdf num);
"%Bookmark3", (fun () -> bookmark marks fastrefnums 3 pdf num);
"%Bookmark4", (fun () -> bookmark marks fastrefnums 4 pdf num);
"%ExtractedText", (fun () -> Cpdfextracttext.extract_page_text extract_text_font_size pdf num page); "%ExtractedText", (fun () -> Cpdfextracttext.extract_page_text extract_text_font_size pdf num page);
"%Bates", "%Bates",
(fun () -> (fun () ->
@ -199,10 +221,13 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
else implode (many '0' (w - String.length numstring)) ^ numstring))] else implode (many '0' (w - String.length numstring)) ^ numstring))]
let expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines = let expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let marks = Pdfmarks.read_bookmarks pdf in
let expanded_lines = let expanded_lines =
map map
(function text -> (function text ->
process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page)) process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page))
lines lines
in in
(* process URLs for justification too *) (* process URLs for justification too *)
@ -291,7 +316,10 @@ let addtext
(indx0 (fst fontpack)) (indx0 (fst fontpack))
in in
let ops, urls, x, y, hoffset, voffset, text, joffset = let ops, urls, x, y, hoffset, voffset, text, joffset =
let text = process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page) in let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let marks = Pdfmarks.read_bookmarks pdf in
let text = process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page) in
let text, urls = get_urls_line text in let text, urls = get_urls_line text in
let lines = map (fun text -> if raw || fontpack <> None then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text) lines in let lines = map (fun text -> if raw || fontpack <> None then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text) lines in
let expanded_lines = expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines in let expanded_lines = expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines in

View File

@ -61,6 +61,8 @@ val addrectangle :
(**/**) (**/**)
val replace_pairs : val replace_pairs :
Pdfmarks.t list ->
(int, int) Hashtbl.t ->
Pdf.t -> Pdf.t ->
int -> int ->
float option -> float option ->

View File

@ -164,8 +164,11 @@ let reset_state () =
(res ()).page_names <- []*) (res ()).page_names <- []*)
let process_specials pdf endpage filename bates batespad num page s = let process_specials pdf endpage filename bates batespad num page s =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let marks = Pdfmarks.read_bookmarks pdf in
let pairs = let pairs =
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page Cpdfaddtext.replace_pairs marks fastrefnums pdf endpage None filename bates batespad num page
in in
Cpdfaddtext.process_text (res ()).time s pairs Cpdfaddtext.process_text (res ()).time s pairs