This commit is contained in:
John Whitington 2022-10-04 20:35:05 +01:00
parent ca2f5f1621
commit 1f6afaccd8
3 changed files with 43 additions and 8 deletions

View File

@ -6,9 +6,7 @@ open Pdfutil
b) See which of them are in the glyph list;
c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes;
d) Build a font to do just those;
e) We put missing glyph or similar for any character not in the encoding
(* FUTURE *)
1) Actually subset the font to save size
2) Allow characters not in the standard encodings by builing one or more secondary subsets *)
(*let () =
@ -50,7 +48,7 @@ let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
let accepted_unicodepoints =
calc_accepted_unicodepoints encoding_table glyphlist_table codepoints
in
let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding in
let f = hd (Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding) in
let name_1 = basename () in
let module TT = Cpdftruetype in
let fontfile =

View File

@ -19,10 +19,46 @@ type t =
firstchar : int;
lastchar : int;
widths : int array;
subset : Pdfio.bytes}
subset : Pdfio.bytes;
tounicode : Pdfio.bytes option}
let dbg = ref false (* text-based debug *)
let tounicode_preamble =
"/CIDInit /ProcSet findresource begin\n\
12 dict begin\n\
begincmap\n\
/CIDSystemInfo <<\n\
/Registry (Adobe)\n\
/Ordering (UCS)\n\
/Supplement 0\n\
>> def\n\
/CMapName /Adobe-Identity-UCS def\n\
/CMapType 2 def\n\
1 begincodespacerange\n\
<00><FF>\n\
endcodespacerange\n"
let tounicode_postamble =
"endbfrange\n\
endcmap\n\
CMapName currentdict /CMap defineresource pop\n\
end\n\
end\n"
let tounicode_map (s : int) (us : int list) =
let b = Buffer.create 1024 in
let s = ref s in
Buffer.add_string b (Printf.sprintf "%i beginbfrange\n" (length us));
iter
(fun u -> Buffer.add_string b (Printf.sprintf "<%02x><%02x><%04x>" !s !s u);
s := !s + 1)
us;
Buffer.contents b
let tounicode s us =
bytes_of_string (tounicode_preamble ^ tounicode_map s us ^ tounicode_postamble)
let required_tables =
["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]
@ -441,6 +477,6 @@ let parse ?(subset=[]) data ~encoding =
let b = mk_b (i32toi locaoffset) in
let loca = read_loca_table indexToLocFormat numGlyphs b in
let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in
{flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
[{flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
widths; subset}
widths; subset; tounicode = None}]

View File

@ -16,9 +16,10 @@ type t =
firstchar : int;
lastchar : int;
widths : int array;
subset : Pdfio.bytes}
subset : Pdfio.bytes;
tounicode : Pdfio.bytes option}
(* Parse the font, given the list of Unicode codepoints required for the subset
and optionally their PDF codepoint too. Returns the information required for
embedding this font in a PDF. *)
val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t
val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t list