cpdf-source/cpdftruetype.ml

677 lines
29 KiB
OCaml
Raw Normal View History

2023-07-05 16:06:18 +02:00
(* Make one or more subsets from a TrueType font *)
2022-09-11 15:52:08 +02:00
open Pdfutil
open Pdfio
2023-07-06 17:26:33 +02:00
(* FIXME Proper widths for .notdef, and warn on .notdef being produced *)
(* FIXME Add suport for composite glyphs *)
2023-07-06 17:37:16 +02:00
(* FIXME Get rid of double-calling 1) make font then 2) collect chars then 3) subset it i.e the subset = [] stuff *)
2023-07-07 15:34:51 +02:00
(* FIXME Make it work with -draw *)
(* FIXME Base on bytes not bits *)
(* FIXME proper error handling for missing file in cpdfcommand.embed_font *)
2023-07-07 16:00:23 +02:00
(* FIXME Make sure -embed-std14 works for all commands *)
(* FIXME Clarify and document or deprecate -no-embed-fonts *)
2023-07-06 17:26:33 +02:00
let dbg = ref false
2023-06-12 15:53:19 +02:00
2023-07-05 16:06:18 +02:00
let _ =
Pdfe.logger := (fun s -> print_string s; flush stdout)
2023-07-04 17:52:25 +02:00
2022-09-11 15:52:08 +02:00
type t =
{flags : int;
minx : int;
miny : int;
maxx : int;
maxy : int;
italicangle : int;
ascent : int;
descent : int;
capheight : int;
stemv : int;
xheight : int;
avgwidth : int;
maxwidth : int;
firstchar : int;
lastchar : int;
widths : int array;
2022-10-10 15:51:58 +02:00
subset_fontfile : Pdfio.bytes;
subset : int list;
2022-10-10 17:40:57 +02:00
tounicode : (int, string) Hashtbl.t option}
2022-09-11 15:52:08 +02:00
2023-06-13 17:48:21 +02:00
let debug_t t =
2023-06-22 14:50:30 +02:00
let hex u =
let b = Buffer.create 32 in
String.iter (fun x -> Buffer.add_string b (Printf.sprintf "%02X" (int_of_char x))) u;
Buffer.contents b
in
Printf.printf "firstchar: %i\n" t.firstchar;
Printf.printf "lastchar: %i\n" t.lastchar;
Printf.printf "widths:"; Array.iter (Printf.printf " %i") t.widths; Printf.printf "\n";
2023-07-07 15:34:51 +02:00
Printf.printf "fontfile of length %i\n" (bytes_size t.subset_fontfile);
2023-06-22 14:50:30 +02:00
Printf.printf "subset:"; iter (Printf.printf " U+%04X") t.subset; Printf.printf "\n";
Printf.printf "tounicode:\n";
begin match t.tounicode with
| None -> Printf.printf "None";
| Some table -> iter (fun (k, v) -> Printf.printf "%i --> U+%s\n" k (hex v)) (sort compare (list_of_hashtbl table))
end;
Printf.printf "\n"
2023-06-13 17:48:21 +02:00
2022-09-28 14:04:20 +02:00
let required_tables =
2022-10-04 16:34:22 +02:00
["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]
2022-09-28 14:04:20 +02:00
2022-09-11 15:52:08 +02:00
(* 32-bit signed fixed-point number (16.16) returned as two ints *)
let read_fixed b =
let a = getval_31 b 16 in
let b = getval_31 b 16 in
a, b
(* 16-bit unsigned integer *)
let read_ushort b = getval_31 b 16
(* 32-bit unsigned integer *)
let read_ulong b = getval_32 b 32
(* Signed byte *)
let read_byte b = getval_31 b 8
(* Signed short *)
let read_short b = sign_extend 16 (getval_31 b 16)
(* f2dot14 - 2 bit signed integer part, 14 bit unsigned fraction *)
let read_f2dot14 b =
let v = read_ushort b in
float_of_int (sign_extend 2 (v lsr 14)) +. (float_of_int (v land 0x3FFF) /. 16384.)
(* discard n bytes *)
let discard_bytes b n =
for x = 1 to n do ignore (getval_31 b 8) done
let padding n =
if n mod 4 = 0 then 0 else 4 - n mod 4
let padding32 n =
i32ofi (padding (i32toi n))
2022-09-26 20:12:44 +02:00
let pdf_unit unitsPerEm x =
2022-09-26 20:14:48 +02:00
int_of_float (float_of_int x *. 1000. /. float_of_int unitsPerEm +. 0.5)
2022-09-26 20:12:44 +02:00
2022-09-11 15:52:08 +02:00
let string_of_tag t =
Printf.sprintf "%c%c%c%c"
(char_of_int (i32toi (Int32.shift_right t 24)))
(char_of_int (i32toi (Int32.logand 0x000000FFl (Int32.shift_right t 16))))
(char_of_int (i32toi (Int32.logand 0x000000FFl (Int32.shift_right t 8))))
(char_of_int (i32toi (Int32.logand 0x000000FFl t)))
let read_format_6_encoding_table b =
let firstCode = read_ushort b in
let entryCount = read_ushort b in
let t = null_hash () in
for x = firstCode to firstCode + entryCount - 1 do
Hashtbl.add t x (read_ushort b)
done;
t
2022-09-11 15:52:08 +02:00
let read_magic_formula b glyphIndexArrayStart seg segCount ro c sc =
b.input.seek_in (glyphIndexArrayStart + (seg - segCount + ro / 2 + (c - sc)) * 2);
b.bit <- 0;
b.bitsread <- 0;
b.currbyte <- 0;
read_short b
2022-09-11 15:52:08 +02:00
let read_format_4_encoding_table b =
let t = null_hash () in
let segCountX2 = read_ushort b in
let segCount = segCountX2 / 2 in
let _ (* searchRange *) = read_ushort b in
let _ (* entrySelector *) = read_ushort b in
let _ (* rangeShift *) = read_ushort b in
2022-09-11 15:52:08 +02:00
let endCodes = Array.init segCount (fun _ -> read_ushort b) in
let _ (* reservedPad *) = read_ushort b in
let startCodes = Array.init segCount (fun _ -> read_ushort b) in
let idDelta = Array.init segCount (fun _ -> read_ushort b) in
let idRangeOffset = Array.init segCount (fun _ -> read_ushort b) in
let glyphIndexArrayStart = b.input.pos_in () in
2022-09-11 15:52:08 +02:00
for seg = 0 to segCount - 1 do
let ec, sc, del, ro = endCodes.(seg), startCodes.(seg), idDelta.(seg), idRangeOffset.(seg) in
2022-09-11 15:52:08 +02:00
for c = sc to ec do
2023-06-12 17:27:46 +02:00
if c != 0xFFFF then
if ro = 0 then Hashtbl.add t c ((c + del) mod 65536) else
2023-06-12 17:27:46 +02:00
let v = read_magic_formula b glyphIndexArrayStart seg segCount ro c sc in
Hashtbl.add t c (((if v = 0 then c else v) + del) mod 65536)
2022-09-11 15:52:08 +02:00
done
done;
t
2023-06-12 17:27:46 +02:00
let print_encoding_table format (table : (int, int) Hashtbl.t) =
2022-11-22 16:09:21 +01:00
let unicodedata = Cpdfunicodedata.unicodedata () in
let unicodetable = Hashtbl.create 16000 in
iter
(fun x ->
Hashtbl.add unicodetable x.Cpdfunicodedata.code_value x.Cpdfunicodedata.character_name)
unicodedata;
let l = sort compare (list_of_hashtbl table) in
if !dbg then Printf.printf "Format table %i: There are %i characters in this font\n" format (length l);
2022-10-07 20:00:42 +02:00
iter
2022-11-22 16:09:21 +01:00
(fun (c, gi) ->
let str = Printf.sprintf "%04X" c in
2023-07-06 17:26:33 +02:00
if !dbg then
Printf.printf "Char %s (%s) is at glyph index %i\n"
str (try Hashtbl.find unicodetable str with Not_found -> "Not_found") gi)
2023-06-12 17:27:46 +02:00
l
2022-10-07 20:00:42 +02:00
2022-09-11 15:52:08 +02:00
let read_encoding_table fmt length version b =
if !dbg then Printf.printf "********** format %i table has length, version %i, %i\n" fmt length version;
2022-09-11 15:52:08 +02:00
match fmt with
| 0 ->
2022-11-22 16:09:21 +01:00
if !dbg then Printf.printf "read_encoding_table: format 0\n";
2022-09-11 15:52:08 +02:00
let t = null_hash () in
for x = 0 to 255 do Hashtbl.add t x (read_byte b) done;
t
2022-10-07 20:00:42 +02:00
| 4 ->
2022-11-22 16:09:21 +01:00
if !dbg then Printf.printf "read_encoding_table: format 4\n";
2022-11-25 12:55:22 +01:00
read_format_4_encoding_table b;
2022-10-07 20:00:42 +02:00
| 6 ->
2022-11-22 16:09:21 +01:00
if !dbg then Printf.printf "read_encoding_table: format 6\n";
2022-11-25 12:55:22 +01:00
read_format_6_encoding_table b;
2023-04-25 14:45:56 +02:00
| n -> raise (Pdf.PDFError "read_encoding_table: format %i not known\n")
2022-09-11 15:52:08 +02:00
let read_loca_table indexToLocFormat numGlyphs b =
2022-10-04 16:34:22 +02:00
match indexToLocFormat with
| 0 -> Array.init (numGlyphs + 1) (function _ -> i32ofi (read_ushort b * 2))
| 1 -> Array.init (numGlyphs + 1) (function _ -> read_ulong b)
| _ -> raise (Pdf.PDFError "Unknown indexToLocFormat in read_loca_table")
2022-09-11 15:52:08 +02:00
2023-07-04 17:52:25 +02:00
let read_os2_table unitsPerEm b blength =
let version = read_ushort b in
if !dbg then Printf.printf "OS/2 table blength = %i bytes, version number = %i\n" blength version;
let xAvgCharWidth = pdf_unit unitsPerEm (read_short b) in
discard_bytes b 64; (* discard 14 entries usWeightClass...fsLastCharIndex *)
(* -- end of original OS/2 Version 0 Truetype table. Must check length before reading now. *)
let sTypoAscender = if blength > 68 then pdf_unit unitsPerEm (read_short b) else 0 in
let sTypoDescender = if blength > 68 then pdf_unit unitsPerEm (read_short b) else 0 in
discard_bytes b 6; (* discard sTypoLineGap...usWinDescent *)
(* -- end of OpenType version 0 table *)
discard_bytes b 8; (* discard ulCodePageRange1, ulCodePageRange2 *)
(* -- end of OpenType version 1 table *)
let sxHeight = if version < 2 then 0 else pdf_unit unitsPerEm (read_short b) in
let sCapHeight = if version < 2 then 0 else pdf_unit unitsPerEm (read_short b) in
(sTypoAscender, sTypoDescender, sCapHeight, sxHeight, xAvgCharWidth)
let read_post_table b =
discard_bytes b 4; (* discard version *)
let italicangle, n = read_fixed b in
italicangle
(* (nb bit 1 is actualy bit 0 etc.) *)
let calculate_flags symbolic italicangle =
let italic = if italicangle <> 0 then 1 else 0 in
let symbolic, nonsymbolic = if symbolic then 1, 0 else 0, 1 in
(italic lsl 6) lor (symbolic lsl 2) lor (nonsymbolic lsl 5)
let calculate_limits subset =
if subset = [] then (0, 255) else
extremes (sort compare subset)
let calculate_stemv () = 0
let read_hhea_table b =
discard_bytes b 34;
read_ushort b (* numOfLongHorMetrics *)
let read_hmtx_table numOfLongHorMetrics b =
Array.init
numOfLongHorMetrics
(fun _ -> let r = read_ushort b in ignore (read_short b); r)
2022-10-05 21:34:45 +02:00
let write_loca_table subset cmap indexToLocFormat bs loca =
2022-10-04 17:51:54 +02:00
let locnums = null_hash () in
2022-10-05 21:34:45 +02:00
Hashtbl.add locnums 0 (); (* .notdef *)
2022-10-04 17:51:54 +02:00
iter
(fun u ->
try
let locnum = Hashtbl.find cmap u in
2023-06-14 16:30:01 +02:00
if !dbg then Printf.printf "write_loca_table: Unicode U+%04X is at location number %i\n" u locnum;
Hashtbl.add locnums locnum ()
with
Not_found -> ())
2022-10-04 17:51:54 +02:00
subset;
2023-07-04 17:36:32 +02:00
let len = ref 0 in
2022-10-06 16:28:36 +02:00
let write_entry loc position =
match indexToLocFormat with
2023-07-04 17:36:32 +02:00
| 0 -> len += 2; putval bs 16 (i32div position 2l)
| 1 -> len += 4; putval bs 32 position
2022-10-06 16:28:36 +02:00
| _ -> raise (Pdf.PDFError "Unknown indexToLocFormat in write_loca_table")
in
let pos = ref 0l in
let pairs =
map
(fun loc ->
let len = i32sub loca.(loc + 1) loca.(loc) in
let r = (loc, !pos) in
pos := i32add !pos len;
r)
(sort compare (map fst (list_of_hashtbl locnums)))
in
let pairs = Array.of_list (pairs @ [(Array.length loca - 1, !pos)]) in
2022-10-04 17:51:54 +02:00
Array.iteri
2022-10-06 16:28:36 +02:00
(fun i (loc, off) ->
if i <> Array.length pairs - 1 then
begin
write_entry loc off;
let loc', off' = pairs.(i + 1) in
for x = 0 to loc' - loc - 2 do write_entry (loc + x) off' done
end
else
write_entry loc off)
2023-07-04 17:36:32 +02:00
pairs;
for x = 1 to padding !len do putval bs 8 0l done
2022-10-04 15:59:42 +02:00
2022-10-05 18:01:58 +02:00
(* Write the notdef glyf, and any others in the subset *)
let write_glyf_table subset cmap bs mk_b glyfoffset loca =
2022-10-06 16:28:36 +02:00
if !dbg then Printf.printf "***write_glyf_table\n";
2022-10-05 18:01:58 +02:00
let locnums = null_hash () in
Hashtbl.add locnums 0 (); (* .notdef *)
iter
(fun u ->
try
let locnum = Hashtbl.find cmap u in
if !dbg then Printf.printf "write_glyf_table: Unicode U+%04X is at location number %i\n" u locnum;
Hashtbl.add locnums locnum ()
with
Not_found -> ())
2022-10-05 18:01:58 +02:00
subset;
let locnums = sort compare (map fst (list_of_hashtbl locnums)) in
2022-10-06 16:28:36 +02:00
if !dbg then (Printf.printf "We want glyfs for locations: "; iter (Printf.printf "%i ") locnums; Printf.printf "\n");
2022-10-05 18:01:58 +02:00
let byteranges = map (fun x -> (loca.(x), loca.(x + 1))) locnums in
2022-10-06 16:28:36 +02:00
if !dbg then (Printf.printf "Byte ranges: "; iter (fun (a, b) -> Printf.printf "(%li, %li) " a b) byteranges; Printf.printf "\n");
2022-10-06 17:53:13 +02:00
let len = List.fold_left i32add 0l (map (fun (a, b) -> i32sub b a) byteranges) in
2022-10-05 18:01:58 +02:00
let write_bytes bs a l =
2022-10-06 16:28:36 +02:00
if !dbg then Printf.printf "glyf: write_bytes %li %li\n" a l;
2022-10-05 18:01:58 +02:00
let b = mk_b (i32toi (i32add glyfoffset a)) in
for x = 1 to i32toi l do putval bs 8 (getval_32 b 8) done
in
2022-10-06 17:53:13 +02:00
iter (fun (a, b) -> write_bytes bs a (i32sub b a)) byteranges;
for x = 1 to padding (i32toi len) do putval bs 8 0l done;
2022-10-07 15:37:05 +02:00
len
2022-10-05 16:10:07 +02:00
2023-06-22 16:48:38 +02:00
let write_cmap_table subset cmap bs =
2023-06-22 17:26:35 +02:00
if !dbg then Printf.printf "***write_cmap_table\n";
2023-06-29 16:09:54 +02:00
let glyphindexes =
map (fun code -> try Hashtbl.find cmap code with Not_found -> 0) subset
2023-06-29 16:09:54 +02:00
in
2023-06-22 17:26:35 +02:00
putval bs 16 0l; (* table version number *)
putval bs 16 1l; (* number of encoding tables *)
putval bs 16 1l; (* platform ID *)
2023-07-05 14:22:25 +02:00
putval bs 16 0l; (* platform encoding ID *)
2023-06-22 17:26:35 +02:00
putval bs 32 12l; (* subtable offset = 12 bytes from beginning of table *)
putval bs 16 6l; (* Table format 6 *)
putval bs 16 (i32ofi (10 + 2 * length glyphindexes)); (* subtable length *)
putval bs 16 0l;
putval bs 16 33l; (* first character code *)
putval bs 16 (i32ofi (length glyphindexes)); (* number of character codes *)
iter (fun gi -> putval bs 16 (i32ofi gi)) glyphindexes; (* glyph indexes *)
let len = i32ofi (22 + 2 * length glyphindexes) in
for x = 1 to padding (i32toi len) do putval bs 8 0l done;
2023-06-22 17:26:35 +02:00
len
2023-06-22 16:48:38 +02:00
2022-09-28 20:16:48 +02:00
let calculate_widths unitsPerEm encoding firstchar lastchar subset cmapdata hmtxdata =
2023-07-04 17:52:25 +02:00
(* For widths, we need the unicode code, not the unencoded byte *)
let unicode_codepoint_of_pdfcode encoding_table glyphlist_table p =
try
hd (Hashtbl.find glyphlist_table (Hashtbl.find encoding_table p))
with
Not_found -> 0
in
if lastchar < firstchar then Cpdferror.error "lastchar < firstchar" else
2022-10-04 17:09:20 +02:00
(*if !dbg then List.iter (fun (a, b) -> Printf.printf "%i -> %i\n" a b) (sort compare (list_of_hashtbl cmapdata));*)
2022-09-19 17:21:14 +02:00
let encoding_table = Pdftext.table_of_encoding encoding in
let glyphlist_table = Pdfglyphlist.glyph_hashes () in
2022-09-11 15:52:08 +02:00
Array.init
(lastchar - firstchar + 1)
(fun pos ->
let code = pos + firstchar in
2023-06-16 16:53:20 +02:00
if !dbg then Printf.printf "code %i --> " code;
2022-09-19 17:21:14 +02:00
let code = unicode_codepoint_of_pdfcode encoding_table glyphlist_table code in
2023-06-16 16:53:20 +02:00
if !dbg then Printf.printf "unicode %i --> " code;
2022-09-16 18:18:17 +02:00
if subset <> [] && not (mem code subset) then 0 else
2022-09-11 15:52:08 +02:00
try
let glyphnum = Hashtbl.find cmapdata code in
2023-06-16 16:53:20 +02:00
if !dbg then Printf.printf "glyph number %i --> " glyphnum;
2023-07-05 17:22:16 +02:00
(* If it fails, we are a monospaced font. Pick the last hmtxdata entry. *)
let width = try hmtxdata.(glyphnum) with _ -> hmtxdata.(Array.length hmtxdata - 1) in
2023-06-16 16:53:20 +02:00
if !dbg then Printf.printf "width %i\n" width;
2022-09-25 17:00:45 +02:00
pdf_unit unitsPerEm width
2022-09-14 19:08:14 +02:00
with e -> if !dbg then Printf.printf "no width for %i\n" code; 0)
2022-09-11 15:52:08 +02:00
2023-06-16 15:44:03 +02:00
let calculate_width_higher unitsPerEm firstchar lastchar subset cmapdata hmtxdata =
let subset = Array.of_list subset in
2023-06-16 17:11:06 +02:00
Array.init
(lastchar - firstchar + 1)
(fun pos ->
try
let glyphnum = Hashtbl.find cmapdata subset.(pos) in
if !dbg then Printf.printf "glyph number %i --> " glyphnum;
(* If it fails, we are a monospaced font. Pick the last hmtxdata entry. *)
let width = try hmtxdata.(glyphnum) with _ -> hmtxdata.(Array.length hmtxdata - 1) in
if !dbg then Printf.printf "width %i\n" width;
pdf_unit unitsPerEm width
with
Not_found -> 0)
2023-06-16 15:44:03 +02:00
2022-09-26 20:12:44 +02:00
let calculate_maxwidth unitsPerEm hmtxdata =
pdf_unit unitsPerEm (hd (sort (fun a b -> compare b a) (Array.to_list hmtxdata)))
2022-09-11 15:52:08 +02:00
2022-10-06 19:54:35 +02:00
2022-11-23 16:15:17 +01:00
let fonumr = ref (-1)
2022-11-23 17:10:32 +01:00
let fonum () = fonumr += 1; !fonumr
2022-11-23 16:15:17 +01:00
2022-10-10 17:08:07 +02:00
let subset_font major minor tables indexToLocFormat subset encoding cmap loca mk_b glyfoffset data =
2022-09-28 18:17:54 +02:00
let tables = Array.of_list (sort (fun (_, _, o, _) (_, _, o', _) -> compare o o') tables) in
let tablesout = ref [] in
let cut = ref 0l in
2022-09-29 15:52:50 +02:00
if !dbg then Printf.printf "***Input:\n";
2022-09-28 18:17:54 +02:00
Array.iteri
(fun i (tag, checkSum, offset, ttlength) ->
2022-09-29 15:52:50 +02:00
if !dbg then Printf.printf "tag = %li = %s, offset = %li\n" tag (string_of_tag tag) offset;
2022-09-28 18:17:54 +02:00
if mem (string_of_tag tag) required_tables then
2023-07-04 16:58:38 +02:00
tablesout := (tag, checkSum, i32sub offset !cut, ttlength)::!tablesout
2022-09-28 18:17:54 +02:00
else
2023-07-04 16:58:38 +02:00
if i < Array.length tables - 1 then
cut := i32add !cut (match tables.(i + 1) with (_, _, offset', _) -> i32sub offset' offset))
2022-09-28 18:17:54 +02:00
tables;
2022-09-28 19:03:32 +02:00
let header_size_reduction = i32ofi (16 * (Array.length tables - length !tablesout)) in
2022-10-06 17:53:13 +02:00
let glyf_table_size_reduction = ref 0l in
2023-06-22 16:48:38 +02:00
let cmap_table_size_reduction = ref 0l in
2022-09-29 15:23:23 +02:00
let newtables =
Array.of_list
(map
2022-10-06 13:51:28 +02:00
(fun (tag, checksum, offset, ttlength) ->
2022-10-06 16:28:36 +02:00
let ttlength =
if string_of_tag tag = "glyf" && subset <> [] then
let bs = make_write_bitstream () in
2022-10-07 15:37:05 +02:00
let newlen = write_glyf_table subset cmap bs mk_b glyfoffset loca in
let paddedlen = i32ofi (bytes_size (bytes_of_write_bitstream bs)) in
2022-10-07 16:59:44 +02:00
if !dbg then Printf.printf "new glyf table length = %li\n" newlen;
glyf_table_size_reduction := i32sub (i32add ttlength (padding32 ttlength)) paddedlen;
2022-10-06 17:53:13 +02:00
newlen
2023-06-29 16:09:54 +02:00
else if string_of_tag tag = "cmap" && subset <> [] && encoding = Pdftext.ImplicitInFontFile then
2023-07-06 17:26:33 +02:00
let bs = make_write_bitstream () in
2023-06-22 16:48:38 +02:00
let newlen = write_cmap_table subset cmap bs in
let paddedlen = i32ofi (bytes_size (bytes_of_write_bitstream bs)) in
if !dbg then Printf.printf "new cmap table length = %li\n" newlen;
cmap_table_size_reduction := i32sub (i32add ttlength (padding32 ttlength)) paddedlen;
2023-06-22 16:48:38 +02:00
newlen
else
ttlength
2022-10-06 16:28:36 +02:00
in
2023-06-22 16:48:38 +02:00
(* Don't reduce by a table size reduction we have just set, but otherwise do. *)
2022-10-06 17:53:13 +02:00
let offset' =
i32sub
(i32sub offset header_size_reduction)
2023-06-22 16:48:38 +02:00
(if string_of_tag tag = "glyf" then !cmap_table_size_reduction else
if string_of_tag tag = "cmap" then !glyf_table_size_reduction else
i32add !cmap_table_size_reduction !glyf_table_size_reduction)
2022-10-06 17:53:13 +02:00
in
(tag, checksum, offset', ttlength))
2022-09-29 15:23:23 +02:00
(rev !tablesout))
2022-09-28 19:03:32 +02:00
in
2022-09-29 15:52:50 +02:00
if !dbg then Printf.printf "***Reduced:\n";
2022-09-29 15:23:23 +02:00
Array.iter
2022-09-28 19:03:32 +02:00
(fun (tag, checkSum, offset, ttlength) ->
2022-10-07 15:37:05 +02:00
if !dbg then Printf.printf "tag = %li = %s, offset = %li, length = %li\n" tag (string_of_tag tag) offset ttlength)
2022-09-29 15:23:23 +02:00
newtables;
2022-09-28 20:16:48 +02:00
let bs = make_write_bitstream () in
2022-09-29 15:23:23 +02:00
(* table directory *)
let numtables = Array.length newtables in
2022-09-28 20:16:48 +02:00
putval bs 16 (i32ofi major);
putval bs 16 (i32ofi minor);
2022-09-29 15:23:23 +02:00
putval bs 16 (i32ofi numtables); (* numTables *)
putval bs 16 (i32ofi (16 * pow2lt numtables)); (* searchRange *)
putval bs 16 (i32ofi (int_of_float (log (float_of_int (pow2lt numtables))))); (* entrySelector *)
putval bs 16 (i32ofi (numtables * 16)); (* rangeShift *)
Array.iter
(fun (tag, checkSum, offset, ttlength) ->
putval bs 32 tag;
putval bs 32 checkSum;
putval bs 32 offset;
putval bs 32 ttlength)
newtables;
(* find each table in original data, calculating the length from the next offset.
On the last, copy until we run out of data *)
let findtag tag =
let off = ref 0l in
let len = ref None in
begin try
for x = 0 to Array.length tables - 1 do
let t, _, offset, _ = tables.(x) in
if t = tag then
begin
off := offset;
if x < Array.length tables - 1 then
len := Some (let _, _, nextoffset, _ = tables.(x + 1) in i32sub nextoffset offset);
raise Exit
end
done;
Cpdferror.error "failed to find table"
2022-09-29 15:23:23 +02:00
with
Exit -> (!off, !len)
end
in
Array.iter
(fun (tag, _, _, _) ->
2022-09-29 15:52:50 +02:00
if !dbg then Printf.printf "Writing %s table\n" (string_of_tag tag);
2022-10-05 18:01:58 +02:00
if string_of_tag tag = "loca" && subset <> [] then
2022-10-04 17:51:54 +02:00
write_loca_table subset cmap indexToLocFormat bs loca
2022-10-05 18:01:58 +02:00
else if string_of_tag tag = "glyf" && subset <> [] then
2022-10-07 15:37:05 +02:00
ignore (write_glyf_table subset cmap bs mk_b glyfoffset loca)
2023-06-29 16:09:54 +02:00
else if string_of_tag tag = "cmap" && subset <> [] && encoding = Pdftext.ImplicitInFontFile then
2023-06-29 14:53:04 +02:00
ignore (write_cmap_table subset cmap bs)
2022-10-05 16:10:07 +02:00
else
2022-10-04 16:34:22 +02:00
match findtag tag with
| (og_off, Some len) ->
let b = mk_b (i32toi og_off) in
for x = 0 to i32toi len - 1 do putval bs 8 (getval_32 b 8) done
| (og_off, None) ->
let b = mk_b (i32toi og_off) in
try
while true do putval bs 8 (getval_32 b 8) done
with
_ -> ())
2022-09-29 15:23:23 +02:00
newtables;
2023-07-07 15:34:51 +02:00
bytes_of_write_bitstream bs
2022-09-28 18:17:54 +02:00
2023-06-13 22:02:50 +02:00
let write_font filename data =
let fh = open_out_bin filename in
2023-07-07 15:34:51 +02:00
output_string fh (string_of_bytes data);
2023-06-13 22:02:50 +02:00
close_out fh
2023-06-16 16:53:20 +02:00
let find_main encoding subset =
2023-07-06 17:26:33 +02:00
let encoding_table = Pdftext.table_of_encoding encoding in
let first, rest =
2023-06-16 18:24:13 +02:00
List.partition
2023-06-16 17:21:52 +02:00
(fun u -> try ignore (Hashtbl.find encoding_table u); true with Not_found -> false)
2023-07-06 17:26:33 +02:00
subset
in
(first, splitinto 224 rest)
2023-06-16 16:53:20 +02:00
let parse ~subset data encoding =
2022-09-11 15:52:08 +02:00
let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
let b = mk_b 0 in
let major, minor = read_fixed b in
if !dbg then Printf.printf "Truetype font version %i.%i\n" major minor;
let numTables = read_ushort b in
let searchRange = read_ushort b in
let entrySelector = read_ushort b in
let rangeShift = read_ushort b in
if !dbg then Printf.printf "numTables = %i, searchRange = %i, entrySelector = %i, rangeShift = %i\n"
numTables searchRange entrySelector rangeShift;
let tables = ref [] in
for x = 1 to numTables do
let tag = read_ulong b in
let checkSum = read_ulong b in
let offset = read_ulong b in
let ttlength = read_ulong b in
if !dbg then Printf.printf "tag = %li = %s, checkSum = %li, offset = %li, ttlength = %li\n"
tag (string_of_tag tag) checkSum offset ttlength;
tables =| (tag, checkSum, offset, ttlength);
done;
2022-09-26 20:12:44 +02:00
let headoffset, headlength =
match keep (function (t, _, _, _) -> string_of_tag t = "head") !tables with
| (_, _, o, l)::_ -> o, l
| [] -> raise (Pdf.PDFError "No maxp table found in TrueType font")
in
let b = mk_b (i32toi headoffset) in
discard_bytes b 18;
let unitsPerEm = read_ushort b in
discard_bytes b 16;
2022-09-28 19:03:32 +02:00
let minx = pdf_unit unitsPerEm (read_short b) in
let miny = pdf_unit unitsPerEm (read_short b) in
let maxx = pdf_unit unitsPerEm (read_short b) in
let maxy = pdf_unit unitsPerEm (read_short b) in
2022-09-26 20:12:44 +02:00
discard_bytes b 6;
let indexToLocFormat = read_short b in
let _ (*glyphDataFormat*) = read_short b in
if !dbg then Printf.printf "head table: indexToLocFormat is %i\n" indexToLocFormat;
if !dbg then Printf.printf "box %i %i %i %i\n" minx miny maxx maxy;
2022-09-11 15:52:08 +02:00
let os2 =
match keep (function (t, _, _, _) -> string_of_tag t = "OS/2") !tables with
| (_, _, o, l)::_ -> Some (o, l)
| [] -> None
in
let ascent, descent, capheight, xheight, avgwidth =
match os2 with
2023-07-06 17:26:33 +02:00
| None -> raise (Pdf.PDFError "No os/2 table found in truetype font")
2022-09-26 20:12:44 +02:00
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
2022-09-11 15:52:08 +02:00
in
let italicangle =
match keep (function (t, _, _, _) -> string_of_tag t = "post") !tables with
| (_, _, o, _)::_ -> read_post_table (mk_b (i32toi o))
| _ -> 0
in
if !dbg then
Printf.printf "ascent %i descent %i capheight %i xheight %i avgwidth %i\n"
ascent descent capheight xheight avgwidth;
let cmap =
match keep (function (t, _, _, _) -> string_of_tag t = "cmap") !tables with
| (_, _, o, l)::_ -> Some (o, l)
| [] -> None
in
let glyphcodes = ref (null_hash ()) in
begin match cmap with
| None ->
2022-10-07 20:00:42 +02:00
for x = 0 to 255 do Hashtbl.add !glyphcodes x x done
2022-09-11 15:52:08 +02:00
| Some (cmapoffset, cmaplength) ->
let b = mk_b (i32toi cmapoffset) in
let cmap_version = read_ushort b in
let num_encoding_tables = read_ushort b in
if !dbg then Printf.printf "cmap version %i. There are %i encoding tables\n"
cmap_version num_encoding_tables;
for x = 1 to num_encoding_tables do
let platform_id = read_ushort b in
let encoding_id = read_ushort b in
let subtable_offset = read_ulong b in
if !dbg then Printf.printf "subtable %i. platform_id = %i, encoding_id = %i, subtable_offset = %li\n"
x platform_id encoding_id subtable_offset;
let b = mk_b (i32toi cmapoffset + i32toi subtable_offset) in
let fmt = read_ushort b in
let lngth = read_ushort b in
let version = read_ushort b in
if !dbg then Printf.printf "subtable has format %i, length %i, version %i\n" fmt lngth version;
let got_glyphcodes = read_encoding_table fmt lngth version b in
2023-07-07 15:34:51 +02:00
Hashtbl.iter (Hashtbl.add !glyphcodes) got_glyphcodes
2022-09-11 15:52:08 +02:00
done;
end;
let maxpoffset, maxplength =
match keep (function (t, _, _, _) -> string_of_tag t = "maxp") !tables with
| (_, _, o, l)::_ -> o, l
| [] -> raise (Pdf.PDFError "No maxp table found in TrueType font")
in
let b = mk_b (i32toi maxpoffset) in
2022-09-28 20:16:48 +02:00
let mmajor, mminor = read_fixed b in
2022-09-11 15:52:08 +02:00
let numGlyphs = read_ushort b in
2022-09-28 20:16:48 +02:00
if !dbg then Printf.printf "maxp table version %i.%i: This font has %i glyphs\n" mmajor mminor numGlyphs;
2022-09-11 15:52:08 +02:00
let locaoffset, localength =
match keep (function (t, _, _, _) -> string_of_tag t = "loca") !tables with
| (_, _, o, l)::_ -> o, l
| [] -> raise (Pdf.PDFError "No loca table found in TrueType font")
in
2023-07-06 17:26:33 +02:00
let subset_1, subsets_2 = find_main encoding subset in
2023-06-16 15:44:03 +02:00
let flags_1 = calculate_flags false italicangle in
let flags_2 = calculate_flags true italicangle in
2022-10-13 20:33:52 +02:00
let firstchar_1, lastchar_1 = calculate_limits subset_1 in
2023-07-06 17:26:33 +02:00
let firstchars_2, lastchars_2 = split (map (fun subset -> (33, length subset + 33 - 1)) subsets_2) in
2022-09-11 15:52:08 +02:00
let numOfLongHorMetrics =
match keep (function (t, _, _, _) -> string_of_tag t = "hhea") !tables with
| (_, _, o, l)::_ -> let b = mk_b (i32toi o) in read_hhea_table b
| _ -> 0
in
let hmtxdata =
match keep (function (t, _, _, _) -> string_of_tag t = "hmtx") !tables with
| (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o))
| [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font")
in
2022-10-13 20:33:52 +02:00
let widths_1 = calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 !glyphcodes hmtxdata in
2023-07-06 17:26:33 +02:00
let widths_2 =
map3
(fun f l s -> calculate_width_higher unitsPerEm f l s !glyphcodes hmtxdata)
firstchars_2 lastchars_2 subsets_2
in
2022-09-26 20:12:44 +02:00
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
2022-09-11 15:52:08 +02:00
let stemv = calculate_stemv () in
let b = mk_b (i32toi locaoffset) in
2022-10-04 16:34:22 +02:00
let loca = read_loca_table indexToLocFormat numGlyphs b in
2022-10-05 18:01:58 +02:00
let glyfoffset, glyflength =
match keep (function (t, _, _, _) -> string_of_tag t = "glyf") !tables with
| (_, _, o, l)::_ -> o, l
| [] -> raise (Pdf.PDFError "No glyf table found in TrueType font")
in
2022-10-10 17:08:07 +02:00
let main_subset =
2022-10-13 20:33:52 +02:00
subset_font major minor !tables indexToLocFormat subset_1
2022-10-10 17:08:07 +02:00
encoding !glyphcodes loca mk_b glyfoffset data
in
2023-07-06 17:26:33 +02:00
let seconds_subsets =
map
(fun subset ->
subset_font
major minor !tables indexToLocFormat subset Pdftext.ImplicitInFontFile
!glyphcodes loca mk_b glyfoffset data)
subsets_2
in
let seconds_tounicodes =
map
(fun subset ->
if subset = [] then None else
let h = null_hash () in
List.iter2
(fun n u ->
let s = implode (tl (tl (explode (Pdftext.utf16be_of_codepoints [u])))) in
Hashtbl.add h n s)
(map (( + ) 33) (indx0 subset))
subset;
Some h)
subsets_2
2023-06-13 15:07:34 +02:00
in
2023-07-06 17:26:33 +02:00
let one =
{flags = flags_1; minx; miny; maxx; maxy; italicangle; ascent; descent;
capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_1; lastchar = lastchar_1;
widths = widths_1; subset_fontfile = main_subset; subset = subset_1; tounicode = None}
in
let twos =
map6
(fun firstchar lastchar widths subset_fontfile subset tounicode ->
{flags = flags_2; minx; miny; maxx; maxy; italicangle; ascent; descent;
capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
widths; subset_fontfile; subset; tounicode})
firstchars_2 lastchars_2 widths_2 seconds_subsets subsets_2 seconds_tounicodes
in
(*Printf.printf "\nMain subset:\n";
debug_t one;
write_font "one.ttf" one.subset_fontfile;
Printf.printf "\nHigher subset:\n";
debug_t two;
write_font "two.ttf" (hd twos).subset_fontfile;*)
one::twos
let parse ~subset data encoding =
2023-07-05 17:22:16 +02:00
try parse ~subset data encoding with
2023-07-06 17:26:33 +02:00
e -> raise (Cpdferror.error ("Failed to parse TrueType font: " ^ Printexc.to_string e))