First stab at %Bookmark<n>
This commit is contained in:
parent
3ae4a373b5
commit
1491f30094
1
Changes
1
Changes
|
@ -15,6 +15,7 @@ o Show OpenAction in -info
|
||||||
o Show more form information in -info
|
o Show more form information in -info
|
||||||
o Show XFA in -info
|
o Show XFA in -info
|
||||||
o Allow JSON / PDF syntax in dict processing and object exploration
|
o Allow JSON / PDF syntax in dict processing and object exploration
|
||||||
|
o %Bookmark and friends when stamping text
|
||||||
|
|
||||||
Fixes:
|
Fixes:
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,24 @@ let pagelabel pdf num =
|
||||||
num
|
num
|
||||||
(Pdfpagelabels.complete (Pdfpagelabels.read pdf))
|
(Pdfpagelabels.complete (Pdfpagelabels.read pdf))
|
||||||
|
|
||||||
let replace_pairs pdf endpage extract_text_font_size filename bates batespad num page =
|
(* Return UTF8 of current bookmark at given level at start of page. No bookmark
|
||||||
|
available = empty string. *)
|
||||||
|
let bookmark marks fastrefnums level pdf num =
|
||||||
|
let before, _ =
|
||||||
|
(* 1. Pick all marks up to and including those on the needed page. *)
|
||||||
|
cleavewhile (fun mark -> Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target <= num) marks
|
||||||
|
in
|
||||||
|
match
|
||||||
|
(* 2. Remove from the list anything up to the last mark which is at higher
|
||||||
|
level. This prevents sections in an earlier chapter showing up as
|
||||||
|
bookmarks in a later chapter if no section has yet been introduced in
|
||||||
|
that chapter. Do this by reversing, then keeping everything up to any higher level. Then re-reverse. *)
|
||||||
|
rev (fst (cleavewhile (fun mark -> mark.Pdfmarks.level = level) (rev before)))
|
||||||
|
with
|
||||||
|
| h::_ -> Pdftext.utf8_of_pdfdocstring h.Pdfmarks.text
|
||||||
|
| [] -> ""
|
||||||
|
|
||||||
|
let replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page =
|
||||||
[
|
[
|
||||||
"%PageDiv2", (fun () -> string_of_int ((num + 1) / 2));
|
"%PageDiv2", (fun () -> string_of_int ((num + 1) / 2));
|
||||||
"%Page", (fun () -> string_of_int num);
|
"%Page", (fun () -> string_of_int num);
|
||||||
|
@ -187,6 +204,11 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
|
||||||
"%Label", (fun () -> pagelabel pdf num);
|
"%Label", (fun () -> pagelabel pdf num);
|
||||||
"%EndPage", (fun () -> string_of_int endpage);
|
"%EndPage", (fun () -> string_of_int endpage);
|
||||||
"%EndLabel", (fun () -> pagelabel pdf endpage);
|
"%EndLabel", (fun () -> pagelabel pdf endpage);
|
||||||
|
"%Bookmark0", (fun () -> bookmark marks fastrefnums 0 pdf num);
|
||||||
|
"%Bookmark1", (fun () -> bookmark marks fastrefnums 1 pdf num);
|
||||||
|
"%Bookmark2", (fun () -> bookmark marks fastrefnums 2 pdf num);
|
||||||
|
"%Bookmark3", (fun () -> bookmark marks fastrefnums 3 pdf num);
|
||||||
|
"%Bookmark4", (fun () -> bookmark marks fastrefnums 4 pdf num);
|
||||||
"%ExtractedText", (fun () -> Cpdfextracttext.extract_page_text extract_text_font_size pdf num page);
|
"%ExtractedText", (fun () -> Cpdfextracttext.extract_page_text extract_text_font_size pdf num page);
|
||||||
"%Bates",
|
"%Bates",
|
||||||
(fun () ->
|
(fun () ->
|
||||||
|
@ -199,10 +221,13 @@ let replace_pairs pdf endpage extract_text_font_size filename bates batespad num
|
||||||
else implode (many '0' (w - String.length numstring)) ^ numstring))]
|
else implode (many '0' (w - String.length numstring)) ^ numstring))]
|
||||||
|
|
||||||
let expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines =
|
let expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
let marks = Pdfmarks.read_bookmarks pdf in
|
||||||
let expanded_lines =
|
let expanded_lines =
|
||||||
map
|
map
|
||||||
(function text ->
|
(function text ->
|
||||||
process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page))
|
process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page))
|
||||||
lines
|
lines
|
||||||
in
|
in
|
||||||
(* process URLs for justification too *)
|
(* process URLs for justification too *)
|
||||||
|
@ -291,7 +316,10 @@ let addtext
|
||||||
(indx0 (fst fontpack))
|
(indx0 (fst fontpack))
|
||||||
in
|
in
|
||||||
let ops, urls, x, y, hoffset, voffset, text, joffset =
|
let ops, urls, x, y, hoffset, voffset, text, joffset =
|
||||||
let text = process_text time text (replace_pairs pdf endpage extract_text_font_size filename bates batespad num page) in
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
let marks = Pdfmarks.read_bookmarks pdf in
|
||||||
|
let text = process_text time text (replace_pairs marks fastrefnums pdf endpage extract_text_font_size filename bates batespad num page) in
|
||||||
let text, urls = get_urls_line text in
|
let text, urls = get_urls_line text in
|
||||||
let lines = map (fun text -> if raw || fontpack <> None then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text) lines in
|
let lines = map (fun text -> if raw || fontpack <> None then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text) lines in
|
||||||
let expanded_lines = expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines in
|
let expanded_lines = expand_lines text time pdf endpage extract_text_font_size filename bates batespad num page lines in
|
||||||
|
|
|
@ -61,6 +61,8 @@ val addrectangle :
|
||||||
|
|
||||||
(**/**)
|
(**/**)
|
||||||
val replace_pairs :
|
val replace_pairs :
|
||||||
|
Pdfmarks.t list ->
|
||||||
|
(int, int) Hashtbl.t ->
|
||||||
Pdf.t ->
|
Pdf.t ->
|
||||||
int ->
|
int ->
|
||||||
float option ->
|
float option ->
|
||||||
|
|
|
@ -164,8 +164,11 @@ let reset_state () =
|
||||||
(res ()).page_names <- []*)
|
(res ()).page_names <- []*)
|
||||||
|
|
||||||
let process_specials pdf endpage filename bates batespad num page s =
|
let process_specials pdf endpage filename bates batespad num page s =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
let marks = Pdfmarks.read_bookmarks pdf in
|
||||||
let pairs =
|
let pairs =
|
||||||
Cpdfaddtext.replace_pairs pdf endpage None filename bates batespad num page
|
Cpdfaddtext.replace_pairs marks fastrefnums pdf endpage None filename bates batespad num page
|
||||||
in
|
in
|
||||||
Cpdfaddtext.process_text (res ()).time s pairs
|
Cpdfaddtext.process_text (res ()).time s pairs
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue