From bb5fae55d2aaa75cad35e8d7f2fccbe04e9eff45 Mon Sep 17 00:00:00 2001
From: John Whitington <john@coherentgraphics.co.uk>
Date: Thu, 2 Dec 2021 12:04:14 -0800
Subject: [PATCH] more

---
 cpdf.ml        | 14 +++++---------
 cpdfcommand.ml | 36 +++++++++++++++++++++++-------------
 cpdffont.ml    |  2 +-
 cpdftype.ml    | 16 +++++++++-------
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/cpdf.ml b/cpdf.ml
index 08cdf28..c8449a1 100644
--- a/cpdf.ml
+++ b/cpdf.ml
@@ -1125,8 +1125,8 @@ let print_fonts pdf range =
 
 (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
    is in the font (for existing fonts). *)
-let charcodes_of_utf8 pdf font s =
-  let extractor = Pdftext.charcode_extractor_of_font ~debug:false pdf font in
+let charcodes_of_utf8 font s =
+  let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in
   let codepoints = Pdftext.codepoints_of_utf8 s in
     let charcodes =
       option_map
@@ -1141,12 +1141,8 @@ let charcodes_of_utf8 pdf font s =
 (* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
 let utf8_of_winansi s =
   let text_extractor =
-    Pdftext.text_extractor_of_font
-      (Pdf.empty ())
-      (Pdf.Dictionary
-        [("/BaseFont", Pdf.Name "/TimesRoman");
-         ("/Subtype", Pdf.Name "/Type1");
-         ("/Encoding", Pdf.Name "/WinAnsiEncoding")]) 
+    Pdftext.text_extractor_of_font_real
+      (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
   in
     let codepoints = Pdftext.codepoints_of_text text_extractor s in
       Pdftext.utf8_of_codepoints codepoints
@@ -1601,7 +1597,7 @@ let
             end
         | _ -> failwith "addtext: font dictionary not present"
   in
-  let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in
+  let text = if raw then text else charcodes_of_utf8 (Pdftext.read_font pdf fontpdfobj) text in
     let lines = map unescape_string (split_at_newline text) in
       let pdf = ref pdf in
         let voffset =
diff --git a/cpdfcommand.ml b/cpdfcommand.ml
index 228abf8..c58bfb5 100644
--- a/cpdfcommand.ml
+++ b/cpdfcommand.ml
@@ -2919,13 +2919,10 @@ let collate (names, pdfs, ranges) =
     split3 (rev !nis)
 
 let of_utf8 (f, fontsize) t =
-  let pdf = Pdf.empty () in
-  let fontdict = Pdftext.write_font pdf f in
-  let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
-       Pdftext.codepoints_of_utf8 t
-    |> option_map extractor
-    |> map char_of_int
-    |> implode
+     Pdftext.codepoints_of_utf8 t
+  |> option_map (Pdftext.charcode_extractor_of_font_real f)
+  |> map char_of_int
+  |> implode
 
 let of_pdfdocencoding (f, fontsize) t =
   of_utf8 (f, fontsize) (Pdftext.utf8_of_pdfdocstring t)
@@ -2948,9 +2945,13 @@ let rec of_utf8_with_newlines t =
       if c <> "" then items := Text (explode c)::!items;
     rev !items
 
+(* FIXME margins, hyphenation of too-long words, efficiency *)
 let typeset text =
   let pdf = Pdf.empty () in
-  let f = (Pdftext.StandardFont (Pdftext.Courier, Pdftext.WinAnsiEncoding), 12.) in
+  let f = 
+    (begin match args.font with StandardFont sf -> Pdftext.StandardFont (sf, Pdftext.WinAnsiEncoding) | _ -> failwith "typeset bad font" end,
+     args.fontsize)
+  in
   let pages =
     Cpdftype.typeset
       20. 20. 20. 20. Pdfpaper.a4 pdf ([Cpdftype.Font f] @ of_utf8_with_newlines (string_of_bytes text))
@@ -2973,13 +2974,22 @@ let typeset_table_of_contents ~font pdf =
       Pdfpaper.make Pdfunits.PdfPoint width height
   in
   let lines =
+    let refnums = Pdf.page_reference_numbers pdf in
+    let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
     map
       (fun mark ->
-         [Cpdftype.BeginDest mark.Pdfmarks.target;
-          Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.};
-          Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text));
-          Cpdftype.EndDest;
-          Cpdftype.NewLine])
+         let label =
+           let labels = Pdfpagelabels.read pdf in
+           let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
+             try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
+         in
+           [Cpdftype.BeginDest mark.Pdfmarks.target;
+            Cpdftype.HGlue {Cpdftype.glen = float mark.Pdfmarks.level *. args.fontsize *. 2.; Cpdftype.gstretch = 0.};
+            Cpdftype.Text (explode (of_pdfdocencoding f mark.Pdfmarks.text ^ " " ^ of_pdfdocencoding f label));
+            (*Cpdftype.Text [' '];
+            Cpdftype.Text (explode (of_pdfdocencoding f label));*)
+            Cpdftype.EndDest;
+            Cpdftype.NewLine])
       (Pdfmarks.read_bookmarks pdf)
   in
   let toc_pages =
diff --git a/cpdffont.ml b/cpdffont.ml
index cd064fa..936decc 100644
--- a/cpdffont.ml
+++ b/cpdffont.ml
@@ -156,7 +156,7 @@ let print_font_table pdf fontname pagenumber =
             | Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs
             | _ -> None
           in
-          let extractor = Pdftext.text_extractor_of_font pdf font in
+          let extractor = Pdftext.text_extractor_of_font_real pdftextfont in
           let unicodedata = Cpdfunicodedata.unicodedata () in
           let unicodetable = Hashtbl.create 16000 in
            iter
diff --git a/cpdftype.ml b/cpdftype.ml
index 8ad7f81..48e7963 100644
--- a/cpdftype.ml
+++ b/cpdftype.ml
@@ -1,11 +1,6 @@
 (* A typesetter for cpdf. A list of elements is manipulated zero or more times
    to lay it out, paginate it, and so on. It is then typeset to produce a list
    of pages *)
-
-(* FIXME We need to make Pdfstandard14 width calculations much more efficient
-   by caching so that we are not making a table up for each character! *)
-(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
-(* FIXME Fix up charcode / text extractors to take fonts not fontdicts *)
 open Pdfutil
 
 (* Glue *)
@@ -55,8 +50,15 @@ let initial_state () =
    dest = None}
 
 let font_widths f fontsize =
-  let w = fontsize *. (600. /. 1000.) in
-    Array.make 256 w
+  let stdfont =
+    match f with Pdftext.StandardFont (sf, _) -> sf | _ -> failwith "not a standard font"
+  in
+    Array.init
+      256
+      (fun x ->
+           fontsize
+        *. float_of_int (Pdfstandard14.textwidth false Pdftext.WinAnsiEncoding stdfont (string_of_char (char_of_int x)))
+        /. 1000.)
 
 let width_of_string ws s =
   let w = ref 0. in