2022-09-11 15:52:08 +02:00
|
|
|
(* Truetype font parsing and subsetting *)
|
|
|
|
open Pdfutil
|
|
|
|
open Pdfio
|
|
|
|
|
2022-10-05 16:14:53 +02:00
|
|
|
(* ./cpdf -font-ttf ~/repos/pdfs/fonts/NotoSans-Bold.ttf -add-text foo hello.pdf -o out.pdf *)
|
|
|
|
|
2022-09-11 15:52:08 +02:00
|
|
|
type t =
|
|
|
|
{flags : int;
|
|
|
|
minx : int;
|
|
|
|
miny : int;
|
|
|
|
maxx : int;
|
|
|
|
maxy : int;
|
|
|
|
italicangle : int;
|
|
|
|
ascent : int;
|
|
|
|
descent : int;
|
|
|
|
capheight : int;
|
|
|
|
stemv : int;
|
|
|
|
xheight : int;
|
|
|
|
avgwidth : int;
|
|
|
|
maxwidth : int;
|
|
|
|
firstchar : int;
|
|
|
|
lastchar : int;
|
|
|
|
widths : int array;
|
2022-10-04 21:35:05 +02:00
|
|
|
subset : Pdfio.bytes;
|
|
|
|
tounicode : Pdfio.bytes option}
|
2022-09-11 15:52:08 +02:00
|
|
|
|
2022-09-19 15:30:32 +02:00
|
|
|
let dbg = ref false (* text-based debug *)
|
2022-09-11 15:52:08 +02:00
|
|
|
|
2022-10-04 21:35:05 +02:00
|
|
|
let tounicode_preamble =
|
|
|
|
"/CIDInit /ProcSet findresource begin\n\
|
|
|
|
12 dict begin\n\
|
|
|
|
begincmap\n\
|
|
|
|
/CIDSystemInfo <<\n\
|
|
|
|
/Registry (Adobe)\n\
|
|
|
|
/Ordering (UCS)\n\
|
|
|
|
/Supplement 0\n\
|
|
|
|
>> def\n\
|
|
|
|
/CMapName /Adobe-Identity-UCS def\n\
|
|
|
|
/CMapType 2 def\n\
|
|
|
|
1 begincodespacerange\n\
|
|
|
|
<00><FF>\n\
|
|
|
|
endcodespacerange\n"
|
|
|
|
|
|
|
|
let tounicode_postamble =
|
|
|
|
"endbfrange\n\
|
|
|
|
endcmap\n\
|
|
|
|
CMapName currentdict /CMap defineresource pop\n\
|
|
|
|
end\n\
|
|
|
|
end\n"
|
|
|
|
|
2022-10-05 16:10:07 +02:00
|
|
|
let tounicode_map s us =
|
2022-10-04 21:35:05 +02:00
|
|
|
let b = Buffer.create 1024 in
|
|
|
|
let s = ref s in
|
2022-10-05 16:10:07 +02:00
|
|
|
Buffer.add_string b tounicode_preamble;
|
2022-10-04 21:35:05 +02:00
|
|
|
Buffer.add_string b (Printf.sprintf "%i beginbfrange\n" (length us));
|
|
|
|
iter
|
|
|
|
(fun u -> Buffer.add_string b (Printf.sprintf "<%02x><%02x><%04x>" !s !s u);
|
|
|
|
s := !s + 1)
|
|
|
|
us;
|
2022-10-05 16:10:07 +02:00
|
|
|
Buffer.add_string b tounicode_postamble;
|
|
|
|
bytes_of_string (Buffer.contents b)
|
2022-10-04 21:35:05 +02:00
|
|
|
|
2022-09-28 14:04:20 +02:00
|
|
|
let required_tables =
|
2022-10-04 16:34:22 +02:00
|
|
|
["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]
|
2022-09-28 14:04:20 +02:00
|
|
|
|
2022-09-11 15:52:08 +02:00
|
|
|
(* 32-bit signed fixed-point number (16.16) returned as two ints *)
|
|
|
|
let read_fixed b =
|
|
|
|
let a = getval_31 b 16 in
|
|
|
|
let b = getval_31 b 16 in
|
|
|
|
a, b
|
|
|
|
|
|
|
|
(* 16-bit unsigned integer *)
|
|
|
|
let read_ushort b = getval_31 b 16
|
|
|
|
|
|
|
|
(* 32-bit unsigned integer *)
|
|
|
|
let read_ulong b = getval_32 b 32
|
|
|
|
|
|
|
|
(* Signed byte *)
|
|
|
|
let read_byte b = getval_31 b 8
|
|
|
|
|
|
|
|
(* Signed short *)
|
|
|
|
let read_short b = sign_extend 16 (getval_31 b 16)
|
|
|
|
|
|
|
|
(* f2dot14 - 2 bit signed integer part, 14 bit unsigned fraction *)
|
|
|
|
let read_f2dot14 b =
|
|
|
|
let v = read_ushort b in
|
|
|
|
float_of_int (sign_extend 2 (v lsr 14)) +. (float_of_int (v land 0x3FFF) /. 16384.)
|
|
|
|
|
|
|
|
(* discard n bytes *)
|
|
|
|
let discard_bytes b n =
|
|
|
|
for x = 1 to n do ignore (getval_31 b 8) done
|
|
|
|
|
2022-09-26 20:12:44 +02:00
|
|
|
let pdf_unit unitsPerEm x =
|
2022-09-26 20:14:48 +02:00
|
|
|
int_of_float (float_of_int x *. 1000. /. float_of_int unitsPerEm +. 0.5)
|
2022-09-26 20:12:44 +02:00
|
|
|
|
2022-09-11 15:52:08 +02:00
|
|
|
let string_of_tag t =
|
|
|
|
Printf.sprintf "%c%c%c%c"
|
|
|
|
(char_of_int (i32toi (Int32.shift_right t 24)))
|
|
|
|
(char_of_int (i32toi (Int32.logand 0x000000FFl (Int32.shift_right t 16))))
|
|
|
|
(char_of_int (i32toi (Int32.logand 0x000000FFl (Int32.shift_right t 8))))
|
|
|
|
(char_of_int (i32toi (Int32.logand 0x000000FFl t)))
|
|
|
|
|
|
|
|
let read_format_6_encoding_table b =
|
|
|
|
let firstCode = read_ushort b in
|
|
|
|
let entryCount = read_ushort b in
|
|
|
|
let t = null_hash () in
|
|
|
|
try
|
|
|
|
for x = firstCode to firstCode + entryCount - 1 do
|
|
|
|
Hashtbl.add t x (read_ushort b)
|
|
|
|
done;
|
|
|
|
t
|
|
|
|
with
|
|
|
|
e -> failwith ("bad format 6 table: " ^ Printexc.to_string e ^ "\n")
|
|
|
|
|
|
|
|
(* fixme might need indexToLocFormat here, to undo the "clever" formula. *)
|
|
|
|
let read_format_4_encoding_table b =
|
|
|
|
let t = null_hash () in
|
|
|
|
let segCountX2 = read_ushort b in
|
|
|
|
let segCount = segCountX2 / 2 in
|
|
|
|
let searchRange = read_ushort b in
|
|
|
|
let entrySelector = read_ushort b in
|
|
|
|
let rangeShift = read_ushort b in
|
|
|
|
let endCodes = Array.init segCount (fun _ -> read_ushort b) in
|
|
|
|
let _ (* reservedPad *) = read_ushort b in
|
|
|
|
let startCodes = Array.init segCount (fun _ -> read_ushort b) in
|
|
|
|
let idDelta = Array.init segCount (fun _ -> read_ushort b) in
|
|
|
|
let idRangeOffset = Array.init segCount (fun _ -> read_ushort b) in
|
|
|
|
if !dbg then
|
|
|
|
begin
|
|
|
|
Printf.printf "segCount = %i, searchRange = %i, entrySelector = %i, rangeShift = %i\n" segCount searchRange entrySelector rangeShift;
|
|
|
|
Printf.printf "endCodes\n";
|
|
|
|
print_ints (Array.to_list endCodes);
|
|
|
|
Printf.printf "startCodes\n";
|
|
|
|
print_ints (Array.to_list startCodes);
|
|
|
|
Printf.printf "idDelta\n";
|
|
|
|
print_ints (Array.to_list idDelta);
|
|
|
|
Printf.printf "idRangeOffset\n";
|
|
|
|
print_ints (Array.to_list idRangeOffset);
|
|
|
|
end;
|
|
|
|
for seg = 0 to segCount - 1 do
|
|
|
|
let ec = endCodes.(seg) in
|
|
|
|
let sc = startCodes.(seg) in
|
|
|
|
let del = idDelta.(seg) in
|
|
|
|
let ro = idRangeOffset.(seg) in
|
|
|
|
for c = sc to ec do
|
|
|
|
if ro = 0 then
|
|
|
|
Hashtbl.add t c ((c + del) mod 65536)
|
|
|
|
else
|
|
|
|
let sum = (c - sc) + del in
|
|
|
|
()
|
|
|
|
done
|
|
|
|
done;
|
|
|
|
t
|
|
|
|
|
|
|
|
let read_encoding_table fmt length version b =
|
|
|
|
match fmt with
|
|
|
|
| 0 ->
|
|
|
|
let t = null_hash () in
|
|
|
|
for x = 0 to 255 do Hashtbl.add t x (read_byte b) done;
|
|
|
|
t
|
|
|
|
| 4 -> read_format_4_encoding_table b
|
|
|
|
| 6 -> read_format_6_encoding_table b
|
|
|
|
| n -> raise (Pdf.PDFError "read_encoding_table: format %i not known\n%!")
|
|
|
|
|
|
|
|
let read_loca_table indexToLocFormat numGlyphs b =
|
2022-10-04 16:34:22 +02:00
|
|
|
match indexToLocFormat with
|
|
|
|
| 0 -> Array.init (numGlyphs + 1) (function _ -> i32ofi (read_ushort b * 2))
|
|
|
|
| 1 -> Array.init (numGlyphs + 1) (function _ -> read_ulong b)
|
|
|
|
| _ -> raise (Pdf.PDFError "Unknown indexToLocFormat in read_loca_table")
|
2022-09-11 15:52:08 +02:00
|
|
|
|
2022-10-04 17:51:54 +02:00
|
|
|
let write_loca_table subset cmap indexToLocFormat bs arr =
|
|
|
|
let locnums = null_hash () in
|
|
|
|
iter
|
|
|
|
(fun u ->
|
|
|
|
let locnum = Hashtbl.find cmap u in
|
|
|
|
Printf.printf "Unicode %i is at location number %i\n" u locnum;
|
|
|
|
Hashtbl.add locnums locnum ())
|
|
|
|
subset;
|
|
|
|
let last = ref 0l in
|
|
|
|
Array.iteri
|
|
|
|
(fun i x ->
|
2022-10-04 17:09:20 +02:00
|
|
|
match indexToLocFormat with
|
|
|
|
| 0 ->
|
2022-10-04 17:51:54 +02:00
|
|
|
begin match Hashtbl.find locnums i with
|
|
|
|
| () -> putval bs 16 (i32div x 2l); last := i32div x 2l
|
|
|
|
| exception Not_found -> putval bs 16 !last
|
|
|
|
end
|
2022-10-04 17:09:20 +02:00
|
|
|
| 1 ->
|
2022-10-04 17:51:54 +02:00
|
|
|
begin match Hashtbl.find locnums i with
|
|
|
|
| () -> putval bs 32 x; last := x
|
|
|
|
| exception Not_found -> putval bs 32 !last
|
|
|
|
end
|
2022-10-04 17:09:20 +02:00
|
|
|
| _ -> raise (Pdf.PDFError "Unknown indexToLocFormat in write_loca_table"))
|
|
|
|
arr
|
2022-10-04 15:59:42 +02:00
|
|
|
|
2022-10-05 16:10:07 +02:00
|
|
|
let write_glyf_table subset cmap bs =
|
|
|
|
()
|
|
|
|
|
2022-09-26 20:12:44 +02:00
|
|
|
let read_os2_table unitsPerEm b blength =
|
2022-09-11 15:52:08 +02:00
|
|
|
let version = read_ushort b in
|
|
|
|
if !dbg then Printf.printf "OS/2 table blength = %i bytes, version number = %i\n" blength version;
|
2022-09-26 20:12:44 +02:00
|
|
|
let xAvgCharWidth = pdf_unit unitsPerEm (read_short b) in
|
2022-09-11 15:52:08 +02:00
|
|
|
discard_bytes b 64; (* discard 14 entries usWeightClass...fsLastCharIndex *)
|
|
|
|
(* -- end of original OS/2 Version 0 Truetype table. Must check length before reading now. *)
|
2022-09-26 20:12:44 +02:00
|
|
|
let sTypoAscender = if blength > 68 then pdf_unit unitsPerEm (read_short b) else 0 in
|
|
|
|
let sTypoDescender = if blength > 68 then pdf_unit unitsPerEm (read_short b) else 0 in
|
2022-09-11 15:52:08 +02:00
|
|
|
discard_bytes b 6; (* discard sTypoLineGap...usWinDescent *)
|
|
|
|
(* -- end of OpenType version 0 table *)
|
|
|
|
discard_bytes b 8; (* discard ulCodePageRange1, ulCodePageRange2 *)
|
|
|
|
(* -- end of OpenType version 1 table *)
|
2022-09-26 20:12:44 +02:00
|
|
|
let sxHeight = if version < 2 then 0 else pdf_unit unitsPerEm (read_short b) in
|
|
|
|
let sCapHeight = if version < 2 then 0 else pdf_unit unitsPerEm (read_short b) in
|
2022-09-11 15:52:08 +02:00
|
|
|
(sTypoAscender, sTypoDescender, sCapHeight, sxHeight, xAvgCharWidth)
|
|
|
|
|
|
|
|
let read_post_table b =
|
|
|
|
discard_bytes b 4; (* discard version *)
|
|
|
|
let italicangle, n = read_fixed b in
|
|
|
|
italicangle
|
|
|
|
|
|
|
|
(* Eventually:
|
|
|
|
Set bit 6 for non symbolic. (nb bit 1 is actualy bit 0 etc.)
|
|
|
|
Set bit 7 if italicangle <> 0
|
|
|
|
Set bit 2 if serif ?
|
|
|
|
Set bit 1 if fixed pitch (calculate from widths) *)
|
|
|
|
let calculate_flags italicangle =
|
|
|
|
let italic = if italicangle <> 0 then 1 else 0 in
|
|
|
|
32 lor italic lsl 6
|
|
|
|
|
|
|
|
let calculate_limits subset =
|
|
|
|
if subset = [] then (0, 255) else
|
|
|
|
extremes (sort compare subset)
|
|
|
|
|
2022-09-26 20:12:44 +02:00
|
|
|
let calculate_stemv () = 0
|
2022-09-11 15:52:08 +02:00
|
|
|
|
|
|
|
let read_hhea_table b =
|
|
|
|
discard_bytes b 34;
|
|
|
|
read_ushort b (* numOfLongHorMetrics *)
|
|
|
|
|
|
|
|
let read_hmtx_table numOfLongHorMetrics b =
|
|
|
|
Array.init
|
|
|
|
numOfLongHorMetrics
|
|
|
|
(fun _ -> let r = read_ushort b in ignore (read_short b); r)
|
|
|
|
|
2022-09-19 17:21:14 +02:00
|
|
|
(* For widths, we need the unicode code, not the unencoded byte *)
|
|
|
|
let unicode_codepoint_of_pdfcode encoding_table glyphlist_table p =
|
|
|
|
try
|
|
|
|
hd (Hashtbl.find glyphlist_table (Hashtbl.find encoding_table p))
|
|
|
|
with
|
|
|
|
Not_found -> 0
|
|
|
|
|
2022-09-28 20:16:48 +02:00
|
|
|
let calculate_widths unitsPerEm encoding firstchar lastchar subset cmapdata hmtxdata =
|
2022-09-11 15:52:08 +02:00
|
|
|
if lastchar < firstchar then failwith "lastchar < firschar" else
|
2022-10-04 17:09:20 +02:00
|
|
|
(*if !dbg then List.iter (fun (a, b) -> Printf.printf "%i -> %i\n" a b) (sort compare (list_of_hashtbl cmapdata));*)
|
2022-09-19 17:21:14 +02:00
|
|
|
let encoding_table = Pdftext.table_of_encoding encoding in
|
|
|
|
let glyphlist_table = Pdfglyphlist.glyph_hashes () in
|
2022-09-11 15:52:08 +02:00
|
|
|
Array.init
|
|
|
|
(lastchar - firstchar + 1)
|
|
|
|
(fun pos ->
|
|
|
|
let code = pos + firstchar in
|
2022-09-14 19:08:14 +02:00
|
|
|
if !dbg then Printf.printf "code %i --> " code;
|
2022-09-19 17:21:14 +02:00
|
|
|
let code = unicode_codepoint_of_pdfcode encoding_table glyphlist_table code in
|
|
|
|
if !dbg then Printf.printf "unicode %i --> " code;
|
2022-09-16 18:18:17 +02:00
|
|
|
if subset <> [] && not (mem code subset) then 0 else
|
2022-09-11 15:52:08 +02:00
|
|
|
try
|
|
|
|
let glyphnum = Hashtbl.find cmapdata code in
|
2022-09-14 19:08:14 +02:00
|
|
|
if !dbg then Printf.printf "glyph number %i --> " glyphnum;
|
2022-09-11 15:52:08 +02:00
|
|
|
let width = hmtxdata.(glyphnum) in
|
2022-09-14 19:08:14 +02:00
|
|
|
if !dbg then Printf.printf "width %i\n" width;
|
2022-09-25 17:00:45 +02:00
|
|
|
pdf_unit unitsPerEm width
|
2022-09-14 19:08:14 +02:00
|
|
|
with e -> if !dbg then Printf.printf "no width for %i\n" code; 0)
|
2022-09-11 15:52:08 +02:00
|
|
|
|
2022-09-26 20:12:44 +02:00
|
|
|
let calculate_maxwidth unitsPerEm hmtxdata =
|
|
|
|
pdf_unit unitsPerEm (hd (sort (fun a b -> compare b a) (Array.to_list hmtxdata)))
|
2022-09-11 15:52:08 +02:00
|
|
|
|
2022-10-04 17:09:20 +02:00
|
|
|
let remove_unneeded_tables major minor tables indexToLocFormat subset encoding cmap loca data =
|
2022-09-28 18:17:54 +02:00
|
|
|
let tables = Array.of_list (sort (fun (_, _, o, _) (_, _, o', _) -> compare o o') tables) in
|
|
|
|
let tablesout = ref [] in
|
|
|
|
let cut = ref 0l in
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "***Input:\n";
|
2022-09-28 18:17:54 +02:00
|
|
|
Array.iteri
|
|
|
|
(fun i (tag, checkSum, offset, ttlength) ->
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "tag = %li = %s, offset = %li\n" tag (string_of_tag tag) offset;
|
2022-09-28 18:17:54 +02:00
|
|
|
if mem (string_of_tag tag) required_tables then
|
|
|
|
tablesout := (tag, checkSum, i32sub offset !cut, ttlength)::!tablesout
|
|
|
|
else
|
|
|
|
cut := i32add !cut (match tables.(i + 1) with (_, _, offset', _) -> i32sub offset' offset))
|
|
|
|
tables;
|
2022-09-28 19:03:32 +02:00
|
|
|
(* Reduce offsets by the reduction in header table size *)
|
|
|
|
let header_size_reduction = i32ofi (16 * (Array.length tables - length !tablesout)) in
|
2022-09-29 15:23:23 +02:00
|
|
|
let newtables =
|
|
|
|
Array.of_list
|
|
|
|
(map
|
|
|
|
(fun (tag, checksum, offset, ttlength) -> (tag, checksum, i32sub offset header_size_reduction, ttlength))
|
|
|
|
(rev !tablesout))
|
2022-09-28 19:03:32 +02:00
|
|
|
in
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "***Reduced:\n";
|
2022-09-29 15:23:23 +02:00
|
|
|
Array.iter
|
2022-09-28 19:03:32 +02:00
|
|
|
(fun (tag, checkSum, offset, ttlength) ->
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "tag = %li = %s, offset = %li\n" tag (string_of_tag tag) offset)
|
2022-09-29 15:23:23 +02:00
|
|
|
newtables;
|
2022-09-28 20:16:48 +02:00
|
|
|
let bs = make_write_bitstream () in
|
2022-09-29 15:23:23 +02:00
|
|
|
(* table directory *)
|
|
|
|
let numtables = Array.length newtables in
|
2022-09-28 20:16:48 +02:00
|
|
|
putval bs 16 (i32ofi major);
|
|
|
|
putval bs 16 (i32ofi minor);
|
2022-09-29 15:23:23 +02:00
|
|
|
putval bs 16 (i32ofi numtables); (* numTables *)
|
|
|
|
putval bs 16 (i32ofi (16 * pow2lt numtables)); (* searchRange *)
|
|
|
|
putval bs 16 (i32ofi (int_of_float (log (float_of_int (pow2lt numtables))))); (* entrySelector *)
|
|
|
|
putval bs 16 (i32ofi (numtables * 16)); (* rangeShift *)
|
|
|
|
Array.iter
|
|
|
|
(fun (tag, checkSum, offset, ttlength) ->
|
|
|
|
putval bs 32 tag;
|
|
|
|
putval bs 32 checkSum;
|
|
|
|
putval bs 32 offset;
|
|
|
|
putval bs 32 ttlength)
|
|
|
|
newtables;
|
|
|
|
(* find each table in original data, calculating the length from the next offset.
|
|
|
|
On the last, copy until we run out of data *)
|
|
|
|
let findtag tag =
|
|
|
|
let off = ref 0l in
|
|
|
|
let len = ref None in
|
|
|
|
begin try
|
|
|
|
for x = 0 to Array.length tables - 1 do
|
|
|
|
let t, _, offset, _ = tables.(x) in
|
|
|
|
if t = tag then
|
|
|
|
begin
|
|
|
|
off := offset;
|
|
|
|
if x < Array.length tables - 1 then
|
|
|
|
len := Some (let _, _, nextoffset, _ = tables.(x + 1) in i32sub nextoffset offset);
|
|
|
|
raise Exit
|
|
|
|
end
|
|
|
|
done;
|
|
|
|
failwith "failed to find table"
|
|
|
|
with
|
|
|
|
Exit -> (!off, !len)
|
|
|
|
end
|
|
|
|
in
|
|
|
|
let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
|
|
|
|
Array.iter
|
|
|
|
(fun (tag, _, _, _) ->
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "Writing %s table\n" (string_of_tag tag);
|
2022-10-05 16:10:07 +02:00
|
|
|
if string_of_tag tag = "loca" then
|
2022-10-04 17:51:54 +02:00
|
|
|
write_loca_table subset cmap indexToLocFormat bs loca
|
2022-10-05 16:10:07 +02:00
|
|
|
else if string_of_tag tag = "glyf" then
|
|
|
|
write_glyf_table subset cmap bs
|
|
|
|
else
|
2022-10-04 16:34:22 +02:00
|
|
|
match findtag tag with
|
|
|
|
| (og_off, Some len) ->
|
|
|
|
let b = mk_b (i32toi og_off) in
|
|
|
|
for x = 0 to i32toi len - 1 do putval bs 8 (getval_32 b 8) done
|
|
|
|
| (og_off, None) ->
|
|
|
|
let b = mk_b (i32toi og_off) in
|
|
|
|
try
|
|
|
|
while true do putval bs 8 (getval_32 b 8) done
|
|
|
|
with
|
|
|
|
_ -> ())
|
2022-09-29 15:23:23 +02:00
|
|
|
newtables;
|
|
|
|
let bytes = bytes_of_write_bitstream bs in
|
2022-09-29 15:52:50 +02:00
|
|
|
if !dbg then Printf.printf "Made subset font of length %i bytes\n" (bytes_size bytes);
|
2022-10-04 16:34:22 +02:00
|
|
|
let o = open_out_bin "fontout.ttf" in
|
2022-09-29 15:23:23 +02:00
|
|
|
output_string o (string_of_bytes bytes);
|
2022-10-04 16:34:22 +02:00
|
|
|
close_out o;
|
2022-09-29 15:23:23 +02:00
|
|
|
bytes
|
2022-09-28 18:17:54 +02:00
|
|
|
|
2022-10-04 23:15:14 +02:00
|
|
|
let parse ?(subset=[]) data encoding =
|
2022-09-11 15:52:08 +02:00
|
|
|
let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
|
|
|
|
let b = mk_b 0 in
|
|
|
|
let major, minor = read_fixed b in
|
|
|
|
if !dbg then Printf.printf "Truetype font version %i.%i\n" major minor;
|
|
|
|
let numTables = read_ushort b in
|
|
|
|
let searchRange = read_ushort b in
|
|
|
|
let entrySelector = read_ushort b in
|
|
|
|
let rangeShift = read_ushort b in
|
|
|
|
if !dbg then Printf.printf "numTables = %i, searchRange = %i, entrySelector = %i, rangeShift = %i\n"
|
|
|
|
numTables searchRange entrySelector rangeShift;
|
|
|
|
let tables = ref [] in
|
|
|
|
for x = 1 to numTables do
|
|
|
|
let tag = read_ulong b in
|
|
|
|
let checkSum = read_ulong b in
|
|
|
|
let offset = read_ulong b in
|
|
|
|
let ttlength = read_ulong b in
|
|
|
|
if !dbg then Printf.printf "tag = %li = %s, checkSum = %li, offset = %li, ttlength = %li\n"
|
|
|
|
tag (string_of_tag tag) checkSum offset ttlength;
|
|
|
|
tables =| (tag, checkSum, offset, ttlength);
|
|
|
|
done;
|
2022-09-26 20:12:44 +02:00
|
|
|
let headoffset, headlength =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "head") !tables with
|
|
|
|
| (_, _, o, l)::_ -> o, l
|
|
|
|
| [] -> raise (Pdf.PDFError "No maxp table found in TrueType font")
|
|
|
|
in
|
|
|
|
let b = mk_b (i32toi headoffset) in
|
|
|
|
discard_bytes b 18;
|
|
|
|
let unitsPerEm = read_ushort b in
|
|
|
|
discard_bytes b 16;
|
2022-09-28 19:03:32 +02:00
|
|
|
let minx = pdf_unit unitsPerEm (read_short b) in
|
|
|
|
let miny = pdf_unit unitsPerEm (read_short b) in
|
|
|
|
let maxx = pdf_unit unitsPerEm (read_short b) in
|
|
|
|
let maxy = pdf_unit unitsPerEm (read_short b) in
|
2022-09-26 20:12:44 +02:00
|
|
|
discard_bytes b 6;
|
|
|
|
let indexToLocFormat = read_short b in
|
|
|
|
let _ (*glyphDataFormat*) = read_short b in
|
|
|
|
if !dbg then Printf.printf "head table: indexToLocFormat is %i\n" indexToLocFormat;
|
|
|
|
if !dbg then Printf.printf "box %i %i %i %i\n" minx miny maxx maxy;
|
2022-09-11 15:52:08 +02:00
|
|
|
let os2 =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "OS/2") !tables with
|
|
|
|
| (_, _, o, l)::_ -> Some (o, l)
|
|
|
|
| [] -> None
|
|
|
|
in
|
|
|
|
let ascent, descent, capheight, xheight, avgwidth =
|
|
|
|
match os2 with
|
|
|
|
| None -> raise (Pdf.PDFError "No os/2 table found in truetype font")
|
2022-09-26 20:12:44 +02:00
|
|
|
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
|
2022-09-11 15:52:08 +02:00
|
|
|
in
|
|
|
|
let italicangle =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "post") !tables with
|
|
|
|
| (_, _, o, _)::_ -> read_post_table (mk_b (i32toi o))
|
|
|
|
| _ -> 0
|
|
|
|
in
|
|
|
|
if !dbg then
|
|
|
|
Printf.printf "ascent %i descent %i capheight %i xheight %i avgwidth %i\n"
|
|
|
|
ascent descent capheight xheight avgwidth;
|
|
|
|
let cmap =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "cmap") !tables with
|
|
|
|
| (_, _, o, l)::_ -> Some (o, l)
|
|
|
|
| [] -> None
|
|
|
|
in
|
|
|
|
let glyphcodes = ref (null_hash ()) in
|
|
|
|
begin match cmap with
|
|
|
|
| None ->
|
|
|
|
let t = null_hash () in
|
|
|
|
for x = 0 to 255 do Hashtbl.add t x x done;
|
|
|
|
glyphcodes := t
|
|
|
|
| Some (cmapoffset, cmaplength) ->
|
|
|
|
let b = mk_b (i32toi cmapoffset) in
|
|
|
|
let cmap_version = read_ushort b in
|
|
|
|
let num_encoding_tables = read_ushort b in
|
|
|
|
if !dbg then Printf.printf "cmap version %i. There are %i encoding tables\n"
|
|
|
|
cmap_version num_encoding_tables;
|
|
|
|
for x = 1 to num_encoding_tables do
|
|
|
|
let platform_id = read_ushort b in
|
|
|
|
let encoding_id = read_ushort b in
|
|
|
|
let subtable_offset = read_ulong b in
|
|
|
|
if !dbg then Printf.printf "subtable %i. platform_id = %i, encoding_id = %i, subtable_offset = %li\n"
|
|
|
|
x platform_id encoding_id subtable_offset;
|
|
|
|
let b = mk_b (i32toi cmapoffset + i32toi subtable_offset) in
|
|
|
|
let fmt = read_ushort b in
|
|
|
|
let lngth = read_ushort b in
|
|
|
|
let version = read_ushort b in
|
|
|
|
if !dbg then Printf.printf "subtable has format %i, length %i, version %i\n" fmt lngth version;
|
|
|
|
let got_glyphcodes = read_encoding_table fmt length version b in
|
|
|
|
glyphcodes := got_glyphcodes
|
|
|
|
done;
|
|
|
|
end;
|
|
|
|
let maxpoffset, maxplength =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "maxp") !tables with
|
|
|
|
| (_, _, o, l)::_ -> o, l
|
|
|
|
| [] -> raise (Pdf.PDFError "No maxp table found in TrueType font")
|
|
|
|
in
|
|
|
|
let b = mk_b (i32toi maxpoffset) in
|
2022-09-28 20:16:48 +02:00
|
|
|
let mmajor, mminor = read_fixed b in
|
2022-09-11 15:52:08 +02:00
|
|
|
let numGlyphs = read_ushort b in
|
2022-09-28 20:16:48 +02:00
|
|
|
if !dbg then Printf.printf "maxp table version %i.%i: This font has %i glyphs\n" mmajor mminor numGlyphs;
|
2022-09-11 15:52:08 +02:00
|
|
|
let locaoffset, localength =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "loca") !tables with
|
|
|
|
| (_, _, o, l)::_ -> o, l
|
|
|
|
| [] -> raise (Pdf.PDFError "No loca table found in TrueType font")
|
|
|
|
in
|
|
|
|
let flags = calculate_flags italicangle in
|
|
|
|
let firstchar, lastchar = calculate_limits subset in
|
|
|
|
let numOfLongHorMetrics =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "hhea") !tables with
|
|
|
|
| (_, _, o, l)::_ -> let b = mk_b (i32toi o) in read_hhea_table b
|
|
|
|
| _ -> 0
|
|
|
|
in
|
|
|
|
let hmtxdata =
|
|
|
|
match keep (function (t, _, _, _) -> string_of_tag t = "hmtx") !tables with
|
|
|
|
| (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o))
|
|
|
|
| [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font")
|
|
|
|
in
|
2022-09-25 17:00:45 +02:00
|
|
|
let widths = calculate_widths unitsPerEm encoding firstchar lastchar subset !glyphcodes hmtxdata in
|
2022-09-26 20:12:44 +02:00
|
|
|
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
|
2022-09-11 15:52:08 +02:00
|
|
|
let stemv = calculate_stemv () in
|
|
|
|
let b = mk_b (i32toi locaoffset) in
|
2022-10-04 16:34:22 +02:00
|
|
|
let loca = read_loca_table indexToLocFormat numGlyphs b in
|
2022-10-04 17:09:20 +02:00
|
|
|
let subset = remove_unneeded_tables major minor !tables indexToLocFormat subset encoding !glyphcodes loca data in
|
2022-10-04 21:35:05 +02:00
|
|
|
[{flags; minx; miny; maxx; maxy; italicangle; ascent; descent;
|
2022-09-11 15:52:08 +02:00
|
|
|
capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
|
2022-10-04 21:35:05 +02:00
|
|
|
widths; subset; tounicode = None}]
|