From 8be6e6507c260fec15c104f0a6df2e34d5bd3532 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sat, 8 Jul 2023 22:02:10 +0100 Subject: [PATCH] -table-of-contents now precomputes codepoints --- cpdftoc.ml | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/cpdftoc.ml b/cpdftoc.ml index e749a26..cfbf444 100644 --- a/cpdftoc.ml +++ b/cpdftoc.ml @@ -15,16 +15,14 @@ let rec real_newline = function | [] -> [] (* Cpdftype codepoints from a font and UTF8 *) -let of_utf8 used f t = - let codepoints = Pdftext.codepoints_of_utf8 t in - iter (fun u -> Hashtbl.replace used u ()) codepoints; - codepoints +let of_utf8 f t = + Pdftext.codepoints_of_utf8 t |> option_map (Pdftext.charcode_extractor_of_font_real f) |> map char_of_int (* Cpdftype codepoints from a font and PDFDocEndoding string *) -let of_pdfdocencoding used f t = - of_utf8 used f (Pdftext.utf8_of_pdfdocstring t) +let of_pdfdocencoding f t = + of_utf8 f (Pdftext.utf8_of_pdfdocstring t) (* Remove characters until it is below the length. Then remove three more and add dots for an ellipsis *) @@ -37,12 +35,37 @@ let shorten_text widths l t = let short = shorten_text_inner widths l t in if short = t then t else short @ ['.'; '.'; '.'] +(* Calculate the used codepoints *) +let used pdf fastrefnums labels title marks = + let codepoints = null_hash () in + let addtext t = + iter + (fun c -> Hashtbl.replace codepoints c ()) + (Pdftext.codepoints_of_utf8 (Pdftext.utf8_of_pdfdocstring t)) + in + iter (fun c -> Hashtbl.replace codepoints c ()) (Pdftext.codepoints_of_utf8 title); + iter + (fun m -> + addtext m.Pdfmarks.text; + let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target in + let labeltext = + try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum + in + addtext labeltext) + marks; + codepoints + (* Typeset a table of contents with given font, font size and title. Mediabox (and CropBox) copied from first page of existing PDF. Margin of 10% inside CropBox. Font size of title twice body font size. Null page labels added for TOC, others bumped up and so preserved. *) let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = - let codepoints = [] (* FIXME *) in + let marks = Pdfmarks.read_bookmarks pdf in + if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else + let labels = Pdfpagelabels.read pdf in + let refnums = Pdf.page_reference_numbers pdf in + let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in + let codepoints = map fst (list_of_hashtbl (used pdf fastrefnums labels title marks)) in let font = match font with | Cpdfembed.PreMadeFontPack t -> hd (fst t) @@ -50,8 +73,6 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = hd (fst (Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding)) | Cpdfembed.ExistingNamedFont -> raise (Pdf.PDFError "Cannot use existing font with -table-of-contents") in - let marks = Pdfmarks.read_bookmarks pdf in - if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else let f, fs = (font, fontsize) in let _, bfs as big = (font, fontsize *. 2.) in let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in @@ -72,22 +93,18 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = | Some (xmin, _, xmax, _) -> xmax -. xmin | None -> width in - let labels = Pdfpagelabels.read pdf in - let used = null_hash () in let lines = - let refnums = Pdf.page_reference_numbers pdf in - let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in map (fun mark -> let indent = float mark.Pdfmarks.level *. fontsize *. 2. in - let text = of_pdfdocencoding used f mark.Pdfmarks.text in + let text = of_pdfdocencoding f mark.Pdfmarks.text in let label = - if mark.Pdfmarks.target = NullDestination then of_pdfdocencoding used f " " else + if mark.Pdfmarks.target = NullDestination then of_pdfdocencoding f "" else let pde = let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum in - of_pdfdocencoding used f pde + of_pdfdocencoding f pde in let widths = Cpdftype.font_widths f fontsize in let textgap = width -. margin *. 2. -. indent -. Cpdftype.width_of_string widths label in @@ -109,7 +126,7 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = flatten (map (fun l -> [Cpdftype.Text l; Cpdftype.NewLine]) - (split_toc_title (of_utf8 used f title))) + (split_toc_title (of_utf8 f title))) @ [glue] in let lm, rm, tm, bm = @@ -118,7 +135,6 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = | Some (cminx, cminy, cmaxx, cmaxy) -> (cminx +. margin, (pmaxx -. cmaxx) +. margin, cminy +. margin, (pmaxy -. cmaxy) +. margin) in - Cpdftype.typeset lm rm tm bm firstpage_papersize pdf ([Cpdftype.Font (font, bfs); Cpdftype.BeginDocument] @ title @ [Cpdftype.Font (font, fs)] @ flatten lines)