more

2025-06-05 22:09:39 +02:00 · 2022-10-04 20:35:05 +01:00
parent ca2f5f1621
commit 1f6afaccd8
3 changed files with 43 additions and 8 deletions
--- a/cpdfembed.ml
+++ b/cpdfembed.ml
@@ -6,9 +6,7 @@ open Pdfutil
  b) See which of them are in the glyph list;
  c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes;
  d) Build a font to do just those;
-  e) We put missing glyph or similar for any character not in the encoding
  (* FUTURE *)
-  1) Actually subset the font to save size
  2) Allow characters not in the standard encodings by builing one or more secondary subsets *)

 (*let () =
@@ -50,7 +48,7 @@ let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
  let accepted_unicodepoints =
    calc_accepted_unicodepoints encoding_table glyphlist_table codepoints
  in
-  let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding in
+  let f = hd (Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding) in
  let name_1 = basename () in
  let module TT = Cpdftruetype in
  let fontfile =
--- a/cpdftruetype.ml
+++ b/cpdftruetype.ml
@@ -19,10 +19,46 @@ type t =
   firstchar : int;
   lastchar : int;
   widths : int array;
-   subset : Pdfio.bytes}
+   subset : Pdfio.bytes;
+   tounicode : Pdfio.bytes option}

 let dbg = ref false (* text-based debug *)

+let tounicode_preamble =
+"/CIDInit /ProcSet findresource begin\n\
+12 dict begin\n\
+begincmap\n\
+/CIDSystemInfo <<\n\
+  /Registry (Adobe)\n\
+  /Ordering (UCS)\n\
+  /Supplement 0\n\
+>> def\n\
+/CMapName /Adobe-Identity-UCS def\n\
+/CMapType 2 def\n\
+1 begincodespacerange\n\
+<00><FF>\n\
+endcodespacerange\n"
+
+let tounicode_postamble =
+"endbfrange\n\
+endcmap\n\
+CMapName currentdict /CMap defineresource pop\n\
+end\n\
+end\n"
+
+let tounicode_map (s : int) (us : int list) =
+  let b = Buffer.create 1024 in
+  let s = ref s in
+  Buffer.add_string b (Printf.sprintf "%i beginbfrange\n" (length us));
+  iter
+    (fun u -> Buffer.add_string b (Printf.sprintf "<%02x><%02x><%04x>" !s !s u);
+     s := !s + 1)
+    us;
+  Buffer.contents b
+
+let tounicode s us =
+  bytes_of_string (tounicode_preamble ^ tounicode_map s us ^ tounicode_postamble)
+
 let required_tables =
  ["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]

@@ -441,6 +477,6 @@ let parse ?(subset=[]) data ~encoding =
            let b = mk_b (i32toi locaoffset) in
            let loca = read_loca_table indexToLocFormat numGlyphs b in
            let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in
-              {flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
+              [{flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
              capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
-              widths; subset}
+              widths; subset; tounicode = None}]
--- a/cpdftruetype.mli
+++ b/cpdftruetype.mli
@@ -16,9 +16,10 @@ type t =
   firstchar : int;
   lastchar : int;
   widths : int array;
-   subset : Pdfio.bytes}
+   subset : Pdfio.bytes;
+   tounicode : Pdfio.bytes option}

 (* Parse the font, given the list of Unicode codepoints required for the subset
   and optionally their PDF codepoint too. Returns the information required for
   embedding this font in a PDF. *)
-val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t
+val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t list