-table-of-contents now precomputes codepoints

This commit is contained in:
John Whitington 2023-07-08 22:02:10 +01:00
parent 17ddf4a766
commit 8be6e6507c
1 changed files with 34 additions and 18 deletions

View File

@ -15,16 +15,14 @@ let rec real_newline = function
| [] -> [] | [] -> []
(* Cpdftype codepoints from a font and UTF8 *) (* Cpdftype codepoints from a font and UTF8 *)
let of_utf8 used f t = let of_utf8 f t =
let codepoints = Pdftext.codepoints_of_utf8 t in Pdftext.codepoints_of_utf8 t
iter (fun u -> Hashtbl.replace used u ()) codepoints;
codepoints
|> option_map (Pdftext.charcode_extractor_of_font_real f) |> option_map (Pdftext.charcode_extractor_of_font_real f)
|> map char_of_int |> map char_of_int
(* Cpdftype codepoints from a font and PDFDocEndoding string *) (* Cpdftype codepoints from a font and PDFDocEndoding string *)
let of_pdfdocencoding used f t = let of_pdfdocencoding f t =
of_utf8 used f (Pdftext.utf8_of_pdfdocstring t) of_utf8 f (Pdftext.utf8_of_pdfdocstring t)
(* Remove characters until it is below the length. Then remove three more and (* Remove characters until it is below the length. Then remove three more and
add dots for an ellipsis *) add dots for an ellipsis *)
@ -37,12 +35,37 @@ let shorten_text widths l t =
let short = shorten_text_inner widths l t in let short = shorten_text_inner widths l t in
if short = t then t else short @ ['.'; '.'; '.'] if short = t then t else short @ ['.'; '.'; '.']
(* Calculate the used codepoints *)
let used pdf fastrefnums labels title marks =
let codepoints = null_hash () in
let addtext t =
iter
(fun c -> Hashtbl.replace codepoints c ())
(Pdftext.codepoints_of_utf8 (Pdftext.utf8_of_pdfdocstring t))
in
iter (fun c -> Hashtbl.replace codepoints c ()) (Pdftext.codepoints_of_utf8 title);
iter
(fun m ->
addtext m.Pdfmarks.text;
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target in
let labeltext =
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
in
addtext labeltext)
marks;
codepoints
(* Typeset a table of contents with given font, font size and title. Mediabox (* Typeset a table of contents with given font, font size and title. Mediabox
(and CropBox) copied from first page of existing PDF. Margin of 10% inside (and CropBox) copied from first page of existing PDF. Margin of 10% inside
CropBox. Font size of title twice body font size. Null page labels added for CropBox. Font size of title twice body font size. Null page labels added for
TOC, others bumped up and so preserved. *) TOC, others bumped up and so preserved. *)
let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
let codepoints = [] (* FIXME *) in let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else
let labels = Pdfpagelabels.read pdf in
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let codepoints = map fst (list_of_hashtbl (used pdf fastrefnums labels title marks)) in
let font = let font =
match font with match font with
| Cpdfembed.PreMadeFontPack t -> hd (fst t) | Cpdfembed.PreMadeFontPack t -> hd (fst t)
@ -50,8 +73,6 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
hd (fst (Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding)) hd (fst (Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding))
| Cpdfembed.ExistingNamedFont -> raise (Pdf.PDFError "Cannot use existing font with -table-of-contents") | Cpdfembed.ExistingNamedFont -> raise (Pdf.PDFError "Cannot use existing font with -table-of-contents")
in in
let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else
let f, fs = (font, fontsize) in let f, fs = (font, fontsize) in
let _, bfs as big = (font, fontsize *. 2.) in let _, bfs as big = (font, fontsize *. 2.) in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
@ -72,22 +93,18 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
| Some (xmin, _, xmax, _) -> xmax -. xmin | Some (xmin, _, xmax, _) -> xmax -. xmin
| None -> width | None -> width
in in
let labels = Pdfpagelabels.read pdf in
let used = null_hash () in
let lines = let lines =
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
map map
(fun mark -> (fun mark ->
let indent = float mark.Pdfmarks.level *. fontsize *. 2. in let indent = float mark.Pdfmarks.level *. fontsize *. 2. in
let text = of_pdfdocencoding used f mark.Pdfmarks.text in let text = of_pdfdocencoding f mark.Pdfmarks.text in
let label = let label =
if mark.Pdfmarks.target = NullDestination then of_pdfdocencoding used f " " else if mark.Pdfmarks.target = NullDestination then of_pdfdocencoding f "" else
let pde = let pde =
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
in in
of_pdfdocencoding used f pde of_pdfdocencoding f pde
in in
let widths = Cpdftype.font_widths f fontsize in let widths = Cpdftype.font_widths f fontsize in
let textgap = width -. margin *. 2. -. indent -. Cpdftype.width_of_string widths label in let textgap = width -. margin *. 2. -. indent -. Cpdftype.width_of_string widths label in
@ -109,7 +126,7 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
flatten flatten
(map (map
(fun l -> [Cpdftype.Text l; Cpdftype.NewLine]) (fun l -> [Cpdftype.Text l; Cpdftype.NewLine])
(split_toc_title (of_utf8 used f title))) (split_toc_title (of_utf8 f title)))
@ [glue] @ [glue]
in in
let lm, rm, tm, bm = let lm, rm, tm, bm =
@ -118,7 +135,6 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf =
| Some (cminx, cminy, cmaxx, cmaxy) -> | Some (cminx, cminy, cmaxx, cmaxy) ->
(cminx +. margin, (pmaxx -. cmaxx) +. margin, cminy +. margin, (pmaxy -. cmaxy) +. margin) (cminx +. margin, (pmaxx -. cmaxx) +. margin, cminy +. margin, (pmaxy -. cmaxy) +. margin)
in in
Cpdftype.typeset lm rm tm bm firstpage_papersize pdf Cpdftype.typeset lm rm tm bm firstpage_papersize pdf
([Cpdftype.Font (font, bfs); Cpdftype.BeginDocument] @ title @ ([Cpdftype.Font (font, bfs); Cpdftype.BeginDocument] @ title @
[Cpdftype.Font (font, fs)] @ flatten lines) [Cpdftype.Font (font, fs)] @ flatten lines)