This commit is contained in:
John Whitington 2022-10-04 20:35:05 +01:00
parent ca2f5f1621
commit 1f6afaccd8
3 changed files with 43 additions and 8 deletions

View File

@ -6,9 +6,7 @@ open Pdfutil
b) See which of them are in the glyph list; b) See which of them are in the glyph list;
c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes; c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes;
d) Build a font to do just those; d) Build a font to do just those;
e) We put missing glyph or similar for any character not in the encoding
(* FUTURE *) (* FUTURE *)
1) Actually subset the font to save size
2) Allow characters not in the standard encodings by builing one or more secondary subsets *) 2) Allow characters not in the standard encodings by builing one or more secondary subsets *)
(*let () = (*let () =
@ -50,7 +48,7 @@ let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
let accepted_unicodepoints = let accepted_unicodepoints =
calc_accepted_unicodepoints encoding_table glyphlist_table codepoints calc_accepted_unicodepoints encoding_table glyphlist_table codepoints
in in
let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding in let f = hd (Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding) in
let name_1 = basename () in let name_1 = basename () in
let module TT = Cpdftruetype in let module TT = Cpdftruetype in
let fontfile = let fontfile =

View File

@ -19,10 +19,46 @@ type t =
firstchar : int; firstchar : int;
lastchar : int; lastchar : int;
widths : int array; widths : int array;
subset : Pdfio.bytes} subset : Pdfio.bytes;
tounicode : Pdfio.bytes option}
let dbg = ref false (* text-based debug *) let dbg = ref false (* text-based debug *)
let tounicode_preamble =
"/CIDInit /ProcSet findresource begin\n\
12 dict begin\n\
begincmap\n\
/CIDSystemInfo <<\n\
/Registry (Adobe)\n\
/Ordering (UCS)\n\
/Supplement 0\n\
>> def\n\
/CMapName /Adobe-Identity-UCS def\n\
/CMapType 2 def\n\
1 begincodespacerange\n\
<00><FF>\n\
endcodespacerange\n"
let tounicode_postamble =
"endbfrange\n\
endcmap\n\
CMapName currentdict /CMap defineresource pop\n\
end\n\
end\n"
let tounicode_map (s : int) (us : int list) =
let b = Buffer.create 1024 in
let s = ref s in
Buffer.add_string b (Printf.sprintf "%i beginbfrange\n" (length us));
iter
(fun u -> Buffer.add_string b (Printf.sprintf "<%02x><%02x><%04x>" !s !s u);
s := !s + 1)
us;
Buffer.contents b
let tounicode s us =
bytes_of_string (tounicode_preamble ^ tounicode_map s us ^ tounicode_postamble)
let required_tables = let required_tables =
["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"] ["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]
@ -441,6 +477,6 @@ let parse ?(subset=[]) data ~encoding =
let b = mk_b (i32toi locaoffset) in let b = mk_b (i32toi locaoffset) in
let loca = read_loca_table indexToLocFormat numGlyphs b in let loca = read_loca_table indexToLocFormat numGlyphs b in
let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in
{flags; minx; miny; maxx; maxy; italicangle; ascent; descent; [{flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar; capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
widths; subset} widths; subset; tounicode = None}]

View File

@ -16,9 +16,10 @@ type t =
firstchar : int; firstchar : int;
lastchar : int; lastchar : int;
widths : int array; widths : int array;
subset : Pdfio.bytes} subset : Pdfio.bytes;
tounicode : Pdfio.bytes option}
(* Parse the font, given the list of Unicode codepoints required for the subset (* Parse the font, given the list of Unicode codepoints required for the subset
and optionally their PDF codepoint too. Returns the information required for and optionally their PDF codepoint too. Returns the information required for
embedding this font in a PDF. *) embedding this font in a PDF. *)
val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t list