diff --git a/cpdfembed.ml b/cpdfembed.ml index 4b9dd4a..21d7525 100644 --- a/cpdfembed.ml +++ b/cpdfembed.ml @@ -6,9 +6,7 @@ open Pdfutil b) See which of them are in the glyph list; c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes; d) Build a font to do just those; - e) We put missing glyph or similar for any character not in the encoding (* FUTURE *) - 1) Actually subset the font to save size 2) Allow characters not in the standard encodings by builing one or more secondary subsets *) (*let () = @@ -50,7 +48,7 @@ let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding = let accepted_unicodepoints = calc_accepted_unicodepoints encoding_table glyphlist_table codepoints in - let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding in + let f = hd (Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile ~encoding) in let name_1 = basename () in let module TT = Cpdftruetype in let fontfile = diff --git a/cpdftruetype.ml b/cpdftruetype.ml index 96b274e..c76d128 100644 --- a/cpdftruetype.ml +++ b/cpdftruetype.ml @@ -19,10 +19,46 @@ type t = firstchar : int; lastchar : int; widths : int array; - subset : Pdfio.bytes} + subset : Pdfio.bytes; + tounicode : Pdfio.bytes option} let dbg = ref false (* text-based debug *) +let tounicode_preamble = +"/CIDInit /ProcSet findresource begin\n\ +12 dict begin\n\ +begincmap\n\ +/CIDSystemInfo <<\n\ + /Registry (Adobe)\n\ + /Ordering (UCS)\n\ + /Supplement 0\n\ +>> def\n\ +/CMapName /Adobe-Identity-UCS def\n\ +/CMapType 2 def\n\ +1 begincodespacerange\n\ +<00>\n\ +endcodespacerange\n" + +let tounicode_postamble = +"endbfrange\n\ +endcmap\n\ +CMapName currentdict /CMap defineresource pop\n\ +end\n\ +end\n" + +let tounicode_map (s : int) (us : int list) = + let b = Buffer.create 1024 in + let s = ref s in + Buffer.add_string b (Printf.sprintf "%i beginbfrange\n" (length us)); + iter + (fun u -> Buffer.add_string b (Printf.sprintf "<%02x><%02x><%04x>" !s !s u); + s := !s + 1) + us; + Buffer.contents b + +let tounicode s us = + bytes_of_string (tounicode_preamble ^ tounicode_map s us ^ tounicode_postamble) + let required_tables = ["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"] @@ -441,6 +477,6 @@ let parse ?(subset=[]) data ~encoding = let b = mk_b (i32toi locaoffset) in let loca = read_loca_table indexToLocFormat numGlyphs b in let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in - {flags; minx; miny; maxx; maxy; italicangle; ascent; descent; + [{flags; minx; miny; maxx; maxy; italicangle; ascent; descent; capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar; - widths; subset} + widths; subset; tounicode = None}] diff --git a/cpdftruetype.mli b/cpdftruetype.mli index 6ad448a..79139f8 100644 --- a/cpdftruetype.mli +++ b/cpdftruetype.mli @@ -16,9 +16,10 @@ type t = firstchar : int; lastchar : int; widths : int array; - subset : Pdfio.bytes} + subset : Pdfio.bytes; + tounicode : Pdfio.bytes option} (* Parse the font, given the list of Unicode codepoints required for the subset and optionally their PDF codepoint too. Returns the information required for embedding this font in a PDF. *) -val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t +val parse : ?subset:int list -> Pdfio.bytes -> encoding:Pdftext.encoding -> t list