First stab at %Bookmark<n>
This commit is contained in:
parent
3ae4a373b5
commit
1491f30094
1
Changes
1
Changes
|
@ -15,6 +15,7 @@ o Show OpenAction in -info
|
|||
o Show more form information in -info
|
||||
o Show XFA in -info
|
||||
o Allow JSON / PDF syntax in dict processing and object exploration
|
||||
o %Bookmark and friends when stamping text
|
||||
|
||||
Fixes:
|
||||
|
||||
|
|
|
@ -177,7 +177,24 @@ let pagelabel pdf num =
|
|||
num
|
||||
(Pdfpagelabels.complete (Pdfpagelabels.read pdf))
|
||||
|
||||
let replace_pairs pdf endpage extract_text_font_size filename bates batespad num page =
|
||||
(* Return UTF8 of current bookmark at given level at start of page. No bookmark
|
||||
available = empty string. *)
|
||||
let bookmark marks fastrefnums level pdf num =
|
||||
let before, _ =
|
||||
(* 1. Pick all marks up to and including those on the needed page. *)
|
||||
cleavewhile (fun mark -> Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target <= num) marks
|
||||
in
|
||||
match
|
||||
(* 2. Remove from the list anything up to the last mark which is at higher
|
||||
level. This prevents sections in an earlier chapter showing up as
|
||||
bookmarks in a later chapter if no section has yet been introduced in
|
||||
that chapter. Do this by reversing, then keeping everything up to any higher level. Then re-reverse. *)
|
||||
rev (fst (cleavewhile (fun mark -> mark.Pdfmarks.level = level) (rev before)))
|
||||
with
|
||||
| h::_ -> Pdftext.utf8_of_pdfdocstring h.Pdfmarks.text
|
||||
| [] -> ""
|
||||
|
||||
let replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page =
|
||||
[
|
||||
"%PageDiv2", (fun () -> string_of_int ((num + 1) / 2));
|
||||
"%Page", (fun () -> string_of_int num);
|
||||
|
@ -187,6 +204,11 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
|
|||
"%Label", (fun () -> pagelabel pdf num);
|
||||
"%EndPage", (fun () -> string_of_int endpage);
|
||||
"%EndLabel", (fun () -> pagelabel pdf endpage);
|
||||
"%Bookmark0", (fun () -> bookmark marks fastrefnums 0 pdf num);
|
||||
"%Bookmark1", (fun () -> bookmark marks fastrefnums 1 pdf num);
|
||||
"%Bookmark2", (fun () -> bookmark marks fastrefnums 2 pdf num);
|
||||
"%Bookmark3", (fun () -> bookmark marks fastrefnums 3 pdf num);
|
||||
"%Bookmark4", (fun () -> bookmark marks fastrefnums 4 pdf num);
|
||||
"%ExtractedText", (fun () -> Cpdfextracttext.extract_page_text extract_text_font_size pdf num page);
|
||||
"%Bates",
|
||||
(fun () ->
|
||||
|
@ -199,10 +221,13 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
|
|||
else implode (many '0' (w - String.length numstring)) ^ numstring))]
|
||||
|
||||
let expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines =
|
||||
let refnums = Pdf.page_reference_numbers pdf in
|
||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let expanded_lines =
|
||||
map
|
||||
(function text ->
|
||||
process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page))
|
||||
process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page))
|
||||
lines
|
||||
in
|
||||
(* process URLs for justification too *)
|
||||
|
@ -291,7 +316,10 @@ let addtext
|
|||
(indx0 (fst fontpack))
|
||||
in
|
||||
let ops, urls, x, y, hoffset, voffset, text, joffset =
|
||||
let text = process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page) in
|
||||
let refnums = Pdf.page_reference_numbers pdf in
|
||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let text = process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page) in
|
||||
let text, urls = get_urls_line text in
|
||||
let lines = map (fun text -> if raw || fontpack <> None then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text) lines in
|
||||
let expanded_lines = expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines in
|
||||
|
|
|
@ -61,6 +61,8 @@ val addrectangle :
|
|||
|
||||
(**/**)
|
||||
val replace_pairs :
|
||||
Pdfmarks.t list ->
|
||||
(int, int) Hashtbl.t ->
|
||||
Pdf.t ->
|
||||
int ->
|
||||
float option ->
|
||||
|
|
|
@ -164,8 +164,11 @@ let reset_state () =
|
|||
(res ()).page_names <- []*)
|
||||
|
||||
let process_specials pdf endpage filename bates batespad num page s =
|
||||
let refnums = Pdf.page_reference_numbers pdf in
|
||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||
let marks = Pdfmarks.read_bookmarks pdf in
|
||||
let pairs =
|
||||
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page
|
||||
Cpdfaddtext.replace_pairs marks fastrefnums pdf endpage None filename bates batespad num page
|
||||
in
|
||||
Cpdfaddtext.process_text (res ()).time s pairs
|
||||
|
||||
|
|
Loading…
Reference in New Issue