2023-07-05 16:06:18 +02:00
(* Make one or more subsets from a TrueType font *)
2022-09-11 15:52:08 +02:00
open Pdfutil
open Pdfio
2023-07-12 16:51:29 +02:00
(* FIXME revisit widths for mono font - the new code returning the notdef width is making the notdef width turn up in /Widths, where missing entries should be 0! *)
2023-07-12 13:49:03 +02:00
let dbg =
2023-07-12 14:23:57 +02:00
(* Pdfe.logger := ( fun s -> print_string s; flush stdout ) *)
2023-07-12 13:49:03 +02:00
ref false
2023-07-04 17:52:25 +02:00
2022-09-11 15:52:08 +02:00
type t =
{ flags : int ;
minx : int ;
miny : int ;
maxx : int ;
maxy : int ;
italicangle : int ;
ascent : int ;
descent : int ;
capheight : int ;
stemv : int ;
xheight : int ;
avgwidth : int ;
maxwidth : int ;
firstchar : int ;
lastchar : int ;
widths : int array ;
2022-10-10 15:51:58 +02:00
subset_fontfile : Pdfio . bytes ;
subset : int list ;
2022-10-10 17:40:57 +02:00
tounicode : ( int , string ) Hashtbl . t option }
2022-09-11 15:52:08 +02:00
2023-06-13 17:48:21 +02:00
let debug_t t =
2023-06-22 14:50:30 +02:00
let hex u =
let b = Buffer . create 32 in
String . iter ( fun x -> Buffer . add_string b ( Printf . sprintf " %02X " ( int_of_char x ) ) ) u ;
Buffer . contents b
in
Printf . printf " firstchar: %i \n " t . firstchar ;
Printf . printf " lastchar: %i \n " t . lastchar ;
Printf . printf " widths: " ; Array . iter ( Printf . printf " %i " ) t . widths ; Printf . printf " \n " ;
2023-07-07 15:34:51 +02:00
Printf . printf " fontfile of length %i \n " ( bytes_size t . subset_fontfile ) ;
2023-06-22 14:50:30 +02:00
Printf . printf " subset: " ; iter ( Printf . printf " U+%04X " ) t . subset ; Printf . printf " \n " ;
Printf . printf " tounicode: \n " ;
begin match t . tounicode with
| None -> Printf . printf " None " ;
2023-07-07 16:45:30 +02:00
| Some table ->
iter
( fun ( k , v ) -> Printf . printf " %i --> U+%s \n " k ( hex v ) )
( sort compare ( list_of_hashtbl table ) )
2023-06-22 14:50:30 +02:00
end ;
Printf . printf " \n "
2023-06-13 17:48:21 +02:00
2022-09-28 14:04:20 +02:00
let required_tables =
2022-10-04 16:34:22 +02:00
[ " head " ; " hhea " ; " loca " ; " cmap " ; " maxp " ; " cvt " ; " glyf " ; " prep " ; " hmtx " ; " fpgm " ]
2022-09-28 14:04:20 +02:00
2022-09-11 15:52:08 +02:00
let read_fixed b =
let a = getval_31 b 16 in
2023-07-07 21:59:48 +02:00
let b = getval_31 b 16 in
a , b
2022-09-11 15:52:08 +02:00
let read_ushort b = getval_31 b 16
let read_ulong b = getval_32 b 32
let read_byte b = getval_31 b 8
let read_short b = sign_extend 16 ( getval_31 b 16 )
let read_f2dot14 b =
let v = read_ushort b in
float_of_int ( sign_extend 2 ( v lsr 14 ) ) + . ( float_of_int ( v land 0x3FFF ) /. 16384 . )
let discard_bytes b n =
for x = 1 to n do ignore ( getval_31 b 8 ) done
2023-07-06 14:52:13 +02:00
let padding n =
if n mod 4 = 0 then 0 else 4 - n mod 4
let padding32 n =
i32ofi ( padding ( i32toi n ) )
2022-09-26 20:12:44 +02:00
let pdf_unit unitsPerEm x =
2022-09-26 20:14:48 +02:00
int_of_float ( float_of_int x * . 1000 . /. float_of_int unitsPerEm + . 0 . 5 )
2022-09-26 20:12:44 +02:00
2022-09-11 15:52:08 +02:00
let string_of_tag t =
Printf . sprintf " %c%c%c%c "
( char_of_int ( i32toi ( Int32 . shift_right t 24 ) ) )
( char_of_int ( i32toi ( Int32 . logand 0x000000FFl ( Int32 . shift_right t 16 ) ) ) )
( char_of_int ( i32toi ( Int32 . logand 0x000000FFl ( Int32 . shift_right t 8 ) ) ) )
( char_of_int ( i32toi ( Int32 . logand 0x000000FFl t ) ) )
let read_format_6_encoding_table b =
let firstCode = read_ushort b in
let entryCount = read_ushort b in
let t = null_hash () in
2023-07-06 14:52:13 +02:00
for x = firstCode to firstCode + entryCount - 1 do
Hashtbl . add t x ( read_ushort b )
done ;
t
2022-09-11 15:52:08 +02:00
2023-06-12 14:55:15 +02:00
let read_magic_formula b glyphIndexArrayStart seg segCount ro c sc =
2023-07-06 14:52:13 +02:00
b . input . seek_in ( glyphIndexArrayStart + ( seg - segCount + ro / 2 + ( c - sc ) ) * 2 ) ;
b . bit <- 0 ;
b . bitsread <- 0 ;
b . currbyte <- 0 ;
read_short b
2023-06-12 14:55:15 +02:00
2022-09-11 15:52:08 +02:00
let read_format_4_encoding_table b =
let t = null_hash () in
let segCountX2 = read_ushort b in
let segCount = segCountX2 / 2 in
2023-07-06 14:52:13 +02:00
let _ (* searchRange *) = read_ushort b in
let _ (* entrySelector *) = read_ushort b in
let _ (* rangeShift *) = read_ushort b in
2022-09-11 15:52:08 +02:00
let endCodes = Array . init segCount ( fun _ -> read_ushort b ) in
let _ (* reservedPad *) = read_ushort b in
let startCodes = Array . init segCount ( fun _ -> read_ushort b ) in
let idDelta = Array . init segCount ( fun _ -> read_ushort b ) in
let idRangeOffset = Array . init segCount ( fun _ -> read_ushort b ) in
2023-06-12 14:55:15 +02:00
let glyphIndexArrayStart = b . input . pos_in () in
2022-09-11 15:52:08 +02:00
for seg = 0 to segCount - 1 do
2023-07-06 14:52:13 +02:00
let ec , sc , del , ro = endCodes . ( seg ) , startCodes . ( seg ) , idDelta . ( seg ) , idRangeOffset . ( seg ) in
2022-09-11 15:52:08 +02:00
for c = sc to ec do
2023-06-12 17:27:46 +02:00
if c != 0xFFFF then
2023-07-06 14:52:13 +02:00
if ro = 0 then Hashtbl . add t c ( ( c + del ) mod 65536 ) else
2023-06-12 17:27:46 +02:00
let v = read_magic_formula b glyphIndexArrayStart seg segCount ro c sc in
2023-07-06 14:52:13 +02:00
Hashtbl . add t c ( ( ( if v = 0 then c else v ) + del ) mod 65536 )
2022-09-11 15:52:08 +02:00
done
done ;
t
2023-07-12 13:49:03 +02:00
let print_encoding_table fmt table =
2022-11-22 16:09:21 +01:00
let unicodedata = Cpdfunicodedata . unicodedata () in
let unicodetable = Hashtbl . create 16000 in
iter
( fun x ->
Hashtbl . add unicodetable x . Cpdfunicodedata . code_value x . Cpdfunicodedata . character_name )
unicodedata ;
let l = sort compare ( list_of_hashtbl table ) in
2023-07-12 13:49:03 +02:00
if ! dbg then Printf . printf " Format table %i: There are %i characters in this font \n " fmt ( length l ) ;
2022-10-07 20:00:42 +02:00
iter
2022-11-22 16:09:21 +01:00
( fun ( c , gi ) ->
let str = Printf . sprintf " %04X " c in
2023-07-06 17:26:33 +02:00
if ! dbg then
Printf . printf " Char %s (%s) is at glyph index %i \n "
str ( try Hashtbl . find unicodetable str with Not_found -> " Not_found " ) gi )
2023-06-12 17:27:46 +02:00
l
2022-10-07 20:00:42 +02:00
2022-09-11 15:52:08 +02:00
let read_encoding_table fmt length version b =
2023-06-12 17:51:16 +02:00
if ! dbg then Printf . printf " ********** format %i table has length, version %i, %i \n " fmt length version ;
2022-09-11 15:52:08 +02:00
match fmt with
| 0 ->
let t = null_hash () in
for x = 0 to 255 do Hashtbl . add t x ( read_byte b ) done ;
t
2023-07-07 17:33:07 +02:00
| 4 -> read_format_4_encoding_table b ;
| 6 -> read_format_6_encoding_table b ;
2023-07-09 14:14:43 +02:00
| n ->
Pdfe . log ( Printf . sprintf " read_encoding_table: format %i not known \n " n ) ;
null_hash ()
2022-09-11 15:52:08 +02:00
let read_loca_table indexToLocFormat numGlyphs b =
2022-10-04 16:34:22 +02:00
match indexToLocFormat with
| 0 -> Array . init ( numGlyphs + 1 ) ( function _ -> i32ofi ( read_ushort b * 2 ) )
| 1 -> Array . init ( numGlyphs + 1 ) ( function _ -> read_ulong b )
| _ -> raise ( Pdf . PDFError " Unknown indexToLocFormat in read_loca_table " )
2022-09-11 15:52:08 +02:00
2023-07-04 17:52:25 +02:00
let read_os2_table unitsPerEm b blength =
let version = read_ushort b in
if ! dbg then Printf . printf " OS/2 table blength = %i bytes, version number = %i \n " blength version ;
let xAvgCharWidth = pdf_unit unitsPerEm ( read_short b ) in
discard_bytes b 64 ; (* discard 14 entries usWeightClass...fsLastCharIndex *)
(* -- end of original OS/2 Version 0 Truetype table. Must check length before reading now. *)
let sTypoAscender = if blength > 68 then pdf_unit unitsPerEm ( read_short b ) else 0 in
let sTypoDescender = if blength > 68 then pdf_unit unitsPerEm ( read_short b ) else 0 in
discard_bytes b 6 ; (* discard sTypoLineGap...usWinDescent *)
(* -- end of OpenType version 0 table *)
discard_bytes b 8 ; (* discard ulCodePageRange1, ulCodePageRange2 *)
(* -- end of OpenType version 1 table *)
let sxHeight = if version < 2 then 0 else pdf_unit unitsPerEm ( read_short b ) in
let sCapHeight = if version < 2 then 0 else pdf_unit unitsPerEm ( read_short b ) in
( sTypoAscender , sTypoDescender , sCapHeight , sxHeight , xAvgCharWidth )
let read_post_table b =
discard_bytes b 4 ; (* discard version *)
let italicangle , n = read_fixed b in
italicangle
(* ( nb bit 1 is actualy bit 0 etc. ) *)
let calculate_flags symbolic italicangle =
let italic = if italicangle < > 0 then 1 else 0 in
let symbolic , nonsymbolic = if symbolic then 1 , 0 else 0 , 1 in
( italic lsl 6 ) lor ( symbolic lsl 2 ) lor ( nonsymbolic lsl 5 )
let read_hhea_table b =
discard_bytes b 34 ;
read_ushort b (* numOfLongHorMetrics *)
let read_hmtx_table numOfLongHorMetrics b =
Array . init
numOfLongHorMetrics
( fun _ -> let r = read_ushort b in ignore ( read_short b ) ; r )
2022-10-05 21:34:45 +02:00
let write_loca_table subset cmap indexToLocFormat bs loca =
2022-10-04 17:51:54 +02:00
let locnums = null_hash () in
2022-10-05 21:34:45 +02:00
Hashtbl . add locnums 0 () ; (* .notdef *)
2022-10-04 17:51:54 +02:00
iter
( fun u ->
2023-07-06 14:52:13 +02:00
try
let locnum = Hashtbl . find cmap u in
2023-06-14 16:30:01 +02:00
if ! dbg then Printf . printf " write_loca_table: Unicode U+%04X is at location number %i \n " u locnum ;
2023-07-06 14:52:13 +02:00
Hashtbl . add locnums locnum ()
with
Not_found -> () )
2022-10-04 17:51:54 +02:00
subset ;
2023-07-04 17:36:32 +02:00
let len = ref 0 in
2022-10-06 16:28:36 +02:00
let write_entry loc position =
match indexToLocFormat with
2023-07-04 17:36:32 +02:00
| 0 -> len + = 2 ; putval bs 16 ( i32div position 2l )
| 1 -> len + = 4 ; putval bs 32 position
2022-10-06 16:28:36 +02:00
| _ -> raise ( Pdf . PDFError " Unknown indexToLocFormat in write_loca_table " )
in
let pos = ref 0l in
let pairs =
map
( fun loc ->
let len = i32sub loca . ( loc + 1 ) loca . ( loc ) in
let r = ( loc , ! pos ) in
pos := i32add ! pos len ;
r )
( sort compare ( map fst ( list_of_hashtbl locnums ) ) )
in
let pairs = Array . of_list ( pairs @ [ ( Array . length loca - 1 , ! pos ) ] ) in
2023-07-07 21:59:48 +02:00
Array . iteri
( fun i ( loc , off ) ->
if i < > Array . length pairs - 1 then
begin
write_entry loc off ;
let loc' , off' = pairs . ( i + 1 ) in
for x = 0 to loc' - loc - 2 do write_entry ( loc + x ) off' done
end
else
write_entry loc off )
pairs ;
for x = 1 to padding ! len do putval bs 8 0l done
2022-10-04 15:59:42 +02:00
2023-07-11 15:21:33 +02:00
(* Expand the subset of locations to include composites *)
let expand_composites_one mk_b loca glyfoffset locations =
let rec read_components b =
let componentFlags = read_ushort b in
let glyphIndex = read_ushort b in
if componentFlags land 0x0001 > 0 then discard_bytes b 4 else discard_bytes b 2 ;
( if componentFlags land 0x0008 > 0 then discard_bytes b 2
else if componentFlags land 0x0040 > 0 then discard_bytes b 4
else if componentFlags land 0x0080 > 0 then discard_bytes b 8 ) ;
if componentFlags land 0x0020 > 0 then glyphIndex :: read_components b else [ glyphIndex ]
in
let expanded =
map
( fun l ->
let b = mk_b ( i32toi ( i32add glyfoffset loca . ( l ) ) ) in
let numberOfContours = read_short b in
if numberOfContours < 0 then
begin
discard_bytes b 8 ; (* xMin, xMax, yMin, yMax *)
l :: read_components b
end
else
[ l ] )
locations
in
sort compare ( setify ( flatten expanded ) )
let rec expand_composites mk_b loca glyfoffset locations =
let expanded = expand_composites_one mk_b loca glyfoffset locations in
if expanded = locations then expanded else expand_composites mk_b loca glyfoffset expanded
2022-10-05 18:01:58 +02:00
(* Write the notdef glyf, and any others in the subset *)
let write_glyf_table subset cmap bs mk_b glyfoffset loca =
2022-10-06 16:28:36 +02:00
if ! dbg then Printf . printf " ***write_glyf_table \n " ;
2022-10-05 18:01:58 +02:00
let locnums = null_hash () in
Hashtbl . add locnums 0 () ; (* .notdef *)
iter
( fun u ->
2023-07-06 14:52:13 +02:00
try
let locnum = Hashtbl . find cmap u in
2023-07-07 16:45:30 +02:00
if ! dbg then Printf . printf " write_glyf_table: Unicode U+%04X is at loc num %i \n " u locnum ;
2023-07-06 14:52:13 +02:00
Hashtbl . add locnums locnum ()
2023-07-07 16:45:30 +02:00
with
Not_found -> () )
2022-10-05 18:01:58 +02:00
subset ;
2023-07-11 17:53:21 +02:00
let locnums = (* expand_composites mk_b loca glyfoffset *) ( sort compare ( map fst ( list_of_hashtbl locnums ) ) ) in
2023-07-12 16:51:29 +02:00
if ! dbg then
2023-07-07 16:45:30 +02:00
( Printf . printf " We want glyfs for locations: " ;
iter ( Printf . printf " %i " ) locnums ; Printf . printf " \n " ) ;
2022-10-05 18:01:58 +02:00
let byteranges = map ( fun x -> ( loca . ( x ) , loca . ( x + 1 ) ) ) locnums in
2023-07-11 16:56:30 +02:00
if ! dbg then
2023-07-07 16:45:30 +02:00
( Printf . printf " Byte ranges: " ;
iter ( fun ( a , b ) -> Printf . printf " (%li, %li) " a b ) byteranges ; Printf . printf " \n " ) ;
2023-07-12 13:49:03 +02:00
let len = fold_left i32add 0l ( map ( fun ( a , b ) -> i32sub b a ) byteranges ) in
2022-10-05 18:01:58 +02:00
let write_bytes bs a l =
2022-10-06 16:28:36 +02:00
if ! dbg then Printf . printf " glyf: write_bytes %li %li \n " a l ;
2022-10-05 18:01:58 +02:00
let b = mk_b ( i32toi ( i32add glyfoffset a ) ) in
for x = 1 to i32toi l do putval bs 8 ( getval_32 b 8 ) done
in
2022-10-06 17:53:13 +02:00
iter ( fun ( a , b ) -> write_bytes bs a ( i32sub b a ) ) byteranges ;
2023-07-06 14:52:13 +02:00
for x = 1 to padding ( i32toi len ) do putval bs 8 0l done ;
2022-10-07 15:37:05 +02:00
len
2022-10-05 16:10:07 +02:00
2023-06-22 16:48:38 +02:00
let write_cmap_table subset cmap bs =
2023-06-22 17:26:35 +02:00
if ! dbg then Printf . printf " ***write_cmap_table \n " ;
2023-06-29 16:09:54 +02:00
let glyphindexes =
2023-07-06 14:52:13 +02:00
map ( fun code -> try Hashtbl . find cmap code with Not_found -> 0 ) subset
2023-06-29 16:09:54 +02:00
in
2023-06-22 17:26:35 +02:00
putval bs 16 0l ; (* table version number *)
putval bs 16 1l ; (* number of encoding tables *)
putval bs 16 1l ; (* platform ID *)
2023-07-05 14:22:25 +02:00
putval bs 16 0l ; (* platform encoding ID *)
2023-06-22 17:26:35 +02:00
putval bs 32 12l ; (* subtable offset = 12 bytes from beginning of table *)
putval bs 16 6l ; (* Table format 6 *)
putval bs 16 ( i32ofi ( 10 + 2 * length glyphindexes ) ) ; (* subtable length *)
putval bs 16 0l ;
putval bs 16 33l ; (* first character code *)
putval bs 16 ( i32ofi ( length glyphindexes ) ) ; (* number of character codes *)
iter ( fun gi -> putval bs 16 ( i32ofi gi ) ) glyphindexes ; (* glyph indexes *)
let len = i32ofi ( 22 + 2 * length glyphindexes ) in
2023-07-06 14:52:13 +02:00
for x = 1 to padding ( i32toi len ) do putval bs 8 0l done ;
2023-06-22 17:26:35 +02:00
len
2023-06-22 16:48:38 +02:00
2022-09-28 20:16:48 +02:00
let calculate_widths unitsPerEm encoding firstchar lastchar subset cmapdata hmtxdata =
2023-07-04 17:52:25 +02:00
(* For widths, we need the unicode code, not the unencoded byte *)
let unicode_codepoint_of_pdfcode encoding_table glyphlist_table p =
try
hd ( Hashtbl . find glyphlist_table ( Hashtbl . find encoding_table p ) )
with
Not_found -> 0
in
2023-07-06 14:52:13 +02:00
if lastchar < firstchar then Cpdferror . error " lastchar < firstchar " else
2023-07-12 13:49:03 +02:00
(* if !dbg then iter ( fun ( a, b ) -> Printf.printf "%i -> %i\n" a b ) ( sort compare ( list_of_hashtbl cmapdata ) ) ; *)
2022-09-19 17:21:14 +02:00
let encoding_table = Pdftext . table_of_encoding encoding in
let glyphlist_table = Pdfglyphlist . glyph_hashes () in
2022-09-11 15:52:08 +02:00
Array . init
( lastchar - firstchar + 1 )
( fun pos ->
let code = pos + firstchar in
2023-06-16 16:53:20 +02:00
if ! dbg then Printf . printf " code %i --> " code ;
2022-09-19 17:21:14 +02:00
let code = unicode_codepoint_of_pdfcode encoding_table glyphlist_table code in
2023-06-16 16:53:20 +02:00
if ! dbg then Printf . printf " unicode %i --> " code ;
2023-07-11 13:37:45 +02:00
if not ( mem code subset ) then hmtxdata . ( 0 ) else
2022-09-11 15:52:08 +02:00
try
let glyphnum = Hashtbl . find cmapdata code in
2023-06-16 16:53:20 +02:00
if ! dbg then Printf . printf " glyph number %i --> " glyphnum ;
2023-07-05 17:22:16 +02:00
(* If it fails, we are a monospaced font. Pick the last hmtxdata entry. *)
let width = try hmtxdata . ( glyphnum ) with _ -> hmtxdata . ( Array . length hmtxdata - 1 ) in
2023-06-16 16:53:20 +02:00
if ! dbg then Printf . printf " width %i \n " width ;
2022-09-25 17:00:45 +02:00
pdf_unit unitsPerEm width
2023-07-11 13:37:45 +02:00
with e -> if ! dbg then Printf . printf " no width for %i \n " code ; hmtxdata . ( 0 ) )
2022-09-11 15:52:08 +02:00
2023-06-16 15:44:03 +02:00
let calculate_width_higher unitsPerEm firstchar lastchar subset cmapdata hmtxdata =
let subset = Array . of_list subset in
2023-06-16 17:11:06 +02:00
Array . init
( lastchar - firstchar + 1 )
( fun pos ->
2023-07-06 14:52:13 +02:00
try
let glyphnum = Hashtbl . find cmapdata subset . ( pos ) in
if ! dbg then Printf . printf " glyph number %i --> " glyphnum ;
(* If it fails, we are a monospaced font. Pick the last hmtxdata entry. *)
let width = try hmtxdata . ( glyphnum ) with _ -> hmtxdata . ( Array . length hmtxdata - 1 ) in
if ! dbg then Printf . printf " width %i \n " width ;
pdf_unit unitsPerEm width
with
2023-07-11 13:37:45 +02:00
Not_found -> hmtxdata . ( 0 ) )
2023-06-16 15:44:03 +02:00
2022-09-26 20:12:44 +02:00
let calculate_maxwidth unitsPerEm hmtxdata =
pdf_unit unitsPerEm ( hd ( sort ( fun a b -> compare b a ) ( Array . to_list hmtxdata ) ) )
2022-09-11 15:52:08 +02:00
2022-11-23 16:15:17 +01:00
let fonumr = ref ( - 1 )
2022-11-23 17:10:32 +01:00
let fonum () = fonumr + = 1 ; ! fonumr
2022-11-23 16:15:17 +01:00
2022-10-10 17:08:07 +02:00
let subset_font major minor tables indexToLocFormat subset encoding cmap loca mk_b glyfoffset data =
2022-09-28 18:17:54 +02:00
let tables = Array . of_list ( sort ( fun ( _ , _ , o , _ ) ( _ , _ , o' , _ ) -> compare o o' ) tables ) in
let tablesout = ref [] in
let cut = ref 0l in
Array . iteri
( fun i ( tag , checkSum , offset , ttlength ) ->
2022-09-29 15:52:50 +02:00
if ! dbg then Printf . printf " tag = %li = %s, offset = %li \n " tag ( string_of_tag tag ) offset ;
2022-09-28 18:17:54 +02:00
if mem ( string_of_tag tag ) required_tables then
2023-07-04 16:58:38 +02:00
tablesout := ( tag , checkSum , i32sub offset ! cut , ttlength ) :: ! tablesout
2022-09-28 18:17:54 +02:00
else
2023-07-04 16:58:38 +02:00
if i < Array . length tables - 1 then
cut := i32add ! cut ( match tables . ( i + 1 ) with ( _ , _ , offset' , _ ) -> i32sub offset' offset ) )
2022-09-28 18:17:54 +02:00
tables ;
2022-09-28 19:03:32 +02:00
let header_size_reduction = i32ofi ( 16 * ( Array . length tables - length ! tablesout ) ) in
2022-10-06 17:53:13 +02:00
let glyf_table_size_reduction = ref 0l in
2023-06-22 16:48:38 +02:00
let cmap_table_size_reduction = ref 0l in
2022-09-29 15:23:23 +02:00
let newtables =
Array . of_list
( map
2022-10-06 13:51:28 +02:00
( fun ( tag , checksum , offset , ttlength ) ->
2022-10-06 16:28:36 +02:00
let ttlength =
2023-07-07 17:33:07 +02:00
if string_of_tag tag = " glyf " then
2022-10-06 16:28:36 +02:00
let bs = make_write_bitstream () in
2022-10-07 15:37:05 +02:00
let newlen = write_glyf_table subset cmap bs mk_b glyfoffset loca in
let paddedlen = i32ofi ( bytes_size ( bytes_of_write_bitstream bs ) ) in
2023-07-06 14:52:13 +02:00
glyf_table_size_reduction := i32sub ( i32add ttlength ( padding32 ttlength ) ) paddedlen ;
2022-10-06 17:53:13 +02:00
newlen
2023-07-07 17:33:07 +02:00
else if string_of_tag tag = " cmap " && encoding = Pdftext . ImplicitInFontFile then
2023-07-06 17:26:33 +02:00
let bs = make_write_bitstream () in
2023-06-22 16:48:38 +02:00
let newlen = write_cmap_table subset cmap bs in
let paddedlen = i32ofi ( bytes_size ( bytes_of_write_bitstream bs ) ) in
2023-07-06 14:52:13 +02:00
cmap_table_size_reduction := i32sub ( i32add ttlength ( padding32 ttlength ) ) paddedlen ;
2023-06-22 16:48:38 +02:00
newlen
else
ttlength
2022-10-06 16:28:36 +02:00
in
2023-06-22 16:48:38 +02:00
(* Don't reduce by a table size reduction we have just set, but otherwise do. *)
2022-10-06 17:53:13 +02:00
let offset' =
i32sub
( i32sub offset header_size_reduction )
2023-06-22 16:48:38 +02:00
( if string_of_tag tag = " glyf " then ! cmap_table_size_reduction else
if string_of_tag tag = " cmap " then ! glyf_table_size_reduction else
i32add ! cmap_table_size_reduction ! glyf_table_size_reduction )
2022-10-06 17:53:13 +02:00
in
( tag , checksum , offset' , ttlength ) )
2022-09-29 15:23:23 +02:00
( rev ! tablesout ) )
2022-09-28 19:03:32 +02:00
in
2022-09-29 15:52:50 +02:00
if ! dbg then Printf . printf " ***Reduced: \n " ;
2022-09-29 15:23:23 +02:00
Array . iter
2022-09-28 19:03:32 +02:00
( fun ( tag , checkSum , offset , ttlength ) ->
2023-07-07 16:45:30 +02:00
if ! dbg then
Printf . printf
" tag = %li = %s, offset = %li, length = %li \n " tag ( string_of_tag tag ) offset ttlength )
2022-09-29 15:23:23 +02:00
newtables ;
2022-09-28 20:16:48 +02:00
let bs = make_write_bitstream () in
2022-09-29 15:23:23 +02:00
(* table directory *)
let numtables = Array . length newtables in
2022-09-28 20:16:48 +02:00
putval bs 16 ( i32ofi major ) ;
putval bs 16 ( i32ofi minor ) ;
2022-09-29 15:23:23 +02:00
putval bs 16 ( i32ofi numtables ) ; (* numTables *)
putval bs 16 ( i32ofi ( 16 * pow2lt numtables ) ) ; (* searchRange *)
putval bs 16 ( i32ofi ( int_of_float ( log ( float_of_int ( pow2lt numtables ) ) ) ) ) ; (* entrySelector *)
putval bs 16 ( i32ofi ( numtables * 16 ) ) ; (* rangeShift *)
Array . iter
( fun ( tag , checkSum , offset , ttlength ) ->
putval bs 32 tag ;
putval bs 32 checkSum ;
putval bs 32 offset ;
putval bs 32 ttlength )
newtables ;
(* find each table in original data, calculating the length from the next offset.
On the last , copy until we run out of data * )
let findtag tag =
let off = ref 0l in
let len = ref None in
begin try
for x = 0 to Array . length tables - 1 do
let t , _ , offset , _ = tables . ( x ) in
if t = tag then
begin
off := offset ;
if x < Array . length tables - 1 then
len := Some ( let _ , _ , nextoffset , _ = tables . ( x + 1 ) in i32sub nextoffset offset ) ;
raise Exit
end
done ;
2023-07-06 14:52:13 +02:00
Cpdferror . error " failed to find table "
2022-09-29 15:23:23 +02:00
with
Exit -> ( ! off , ! len )
end
in
Array . iter
( fun ( tag , _ , _ , _ ) ->
2022-09-29 15:52:50 +02:00
if ! dbg then Printf . printf " Writing %s table \n " ( string_of_tag tag ) ;
2023-07-07 17:33:07 +02:00
if string_of_tag tag = " loca " then
2022-10-04 17:51:54 +02:00
write_loca_table subset cmap indexToLocFormat bs loca
2023-07-07 17:33:07 +02:00
else if string_of_tag tag = " glyf " then
2022-10-07 15:37:05 +02:00
ignore ( write_glyf_table subset cmap bs mk_b glyfoffset loca )
2023-07-07 17:33:07 +02:00
else if string_of_tag tag = " cmap " && encoding = Pdftext . ImplicitInFontFile then
2023-06-29 14:53:04 +02:00
ignore ( write_cmap_table subset cmap bs )
2022-10-05 16:10:07 +02:00
else
2022-10-04 16:34:22 +02:00
match findtag tag with
| ( og_off , Some len ) ->
let b = mk_b ( i32toi og_off ) in
for x = 0 to i32toi len - 1 do putval bs 8 ( getval_32 b 8 ) done
| ( og_off , None ) ->
let b = mk_b ( i32toi og_off ) in
try
while true do putval bs 8 ( getval_32 b 8 ) done
with
_ -> () )
2022-09-29 15:23:23 +02:00
newtables ;
2023-07-07 15:34:51 +02:00
bytes_of_write_bitstream bs
2022-09-28 18:17:54 +02:00
2023-06-13 22:02:50 +02:00
let write_font filename data =
let fh = open_out_bin filename in
2023-07-07 15:34:51 +02:00
output_string fh ( string_of_bytes data ) ;
2023-06-13 22:02:50 +02:00
close_out fh
2023-06-16 16:53:20 +02:00
let find_main encoding subset =
2023-07-06 17:26:33 +02:00
let encoding_table = Pdftext . table_of_encoding encoding in
let first , rest =
2023-06-16 18:24:13 +02:00
List . partition
2023-06-16 17:21:52 +02:00
( fun u -> try ignore ( Hashtbl . find encoding_table u ) ; true with Not_found -> false )
2023-07-06 17:26:33 +02:00
subset
in
( first , splitinto 224 rest )
2023-06-16 16:53:20 +02:00
2023-07-06 14:52:13 +02:00
let parse ~ subset data encoding =
2022-09-11 15:52:08 +02:00
let mk_b byte_offset = bitbytes_of_input ( let i = input_of_bytes data in i . seek_in byte_offset ; i ) in
let b = mk_b 0 in
let major , minor = read_fixed b in
2023-07-07 17:33:07 +02:00
let numTables = read_ushort b in
let searchRange = read_ushort b in
let entrySelector = read_ushort b in
let rangeShift = read_ushort b in
if ! dbg then Printf . printf " numTables = %i, searchRange = %i, entrySelector = %i, rangeShift = %i \n "
numTables searchRange entrySelector rangeShift ;
let tables = ref [] in
for x = 1 to numTables do
let tag = read_ulong b in
let checkSum = read_ulong b in
let offset = read_ulong b in
let ttlength = read_ulong b in
if ! dbg then Printf . printf " tag = %li = %s, checkSum = %li, offset = %li, ttlength = %li \n "
tag ( string_of_tag tag ) checkSum offset ttlength ;
tables = | ( tag , checkSum , offset , ttlength ) ;
done ;
let headoffset , headlength =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " head " ) ! tables with
| ( _ , _ , o , l ) :: _ -> o , l
| [] -> raise ( Pdf . PDFError " No maxp table found in TrueType font " )
in
let b = mk_b ( i32toi headoffset ) in
discard_bytes b 18 ;
let unitsPerEm = read_ushort b in
discard_bytes b 16 ;
let minx = pdf_unit unitsPerEm ( read_short b ) in
let miny = pdf_unit unitsPerEm ( read_short b ) in
let maxx = pdf_unit unitsPerEm ( read_short b ) in
let maxy = pdf_unit unitsPerEm ( read_short b ) in
discard_bytes b 6 ;
let indexToLocFormat = read_short b in
let _ (* glyphDataFormat *) = read_short b in
let os2 =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " OS/2 " ) ! tables with
| ( _ , _ , o , l ) :: _ -> Some ( o , l )
| [] -> None
2023-07-06 17:26:33 +02:00
in
2023-07-07 17:33:07 +02:00
let ascent , descent , capheight , xheight , avgwidth =
match os2 with
| None -> raise ( Pdf . PDFError " No os/2 table found in truetype font " )
| Some ( o , l ) -> let b = mk_b ( i32toi o ) in read_os2_table unitsPerEm b ( i32toi l )
2022-10-05 18:01:58 +02:00
in
2023-07-07 17:33:07 +02:00
let italicangle =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " post " ) ! tables with
| ( _ , _ , o , _ ) :: _ -> read_post_table ( mk_b ( i32toi o ) )
| _ -> 0
2022-10-10 17:08:07 +02:00
in
2023-07-07 17:33:07 +02:00
if ! dbg then
Printf . printf " ascent %i descent %i capheight %i xheight %i avgwidth %i \n "
ascent descent capheight xheight avgwidth ;
let cmap =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " cmap " ) ! tables with
| ( _ , _ , o , l ) :: _ -> Some ( o , l )
| [] -> None
2023-07-06 17:26:33 +02:00
in
2023-07-07 17:33:07 +02:00
let glyphcodes = ref ( null_hash () ) in
begin match cmap with
| None ->
for x = 0 to 255 do Hashtbl . add ! glyphcodes x x done
| Some ( cmapoffset , cmaplength ) ->
let b = mk_b ( i32toi cmapoffset ) in
let cmap_version = read_ushort b in
let num_encoding_tables = read_ushort b in
if ! dbg then Printf . printf " cmap version %i. There are %i encoding tables \n "
cmap_version num_encoding_tables ;
for x = 1 to num_encoding_tables do
let platform_id = read_ushort b in
let encoding_id = read_ushort b in
let subtable_offset = read_ulong b in
if ! dbg then Printf . printf " subtable %i. platform_id = %i, encoding_id = %i, subtable_offset = %li \n "
x platform_id encoding_id subtable_offset ;
let b = mk_b ( i32toi cmapoffset + i32toi subtable_offset ) in
let fmt = read_ushort b in
let lngth = read_ushort b in
let version = read_ushort b in
if ! dbg then Printf . printf " subtable has format %i, length %i, version %i \n " fmt lngth version ;
let got_glyphcodes = read_encoding_table fmt lngth version b in
Hashtbl . iter ( Hashtbl . add ! glyphcodes ) got_glyphcodes
done ;
end ;
let maxpoffset , maxplength =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " maxp " ) ! tables with
| ( _ , _ , o , l ) :: _ -> o , l
| [] -> raise ( Pdf . PDFError " No maxp table found in TrueType font " )
in
let b = mk_b ( i32toi maxpoffset ) in
let mmajor , mminor = read_fixed b in
let numGlyphs = read_ushort b in
if ! dbg then Printf . printf " maxp table version %i.%i: This font has %i glyphs \n " mmajor mminor numGlyphs ;
let locaoffset , localength =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " loca " ) ! tables with
| ( _ , _ , o , l ) :: _ -> o , l
| [] -> raise ( Pdf . PDFError " No loca table found in TrueType font " )
in
let subset_1 , subsets_2 = find_main encoding subset in
let flags_1 = calculate_flags false italicangle in
let flags_2 = calculate_flags true italicangle in
let firstchar_1 , lastchar_1 = extremes ( sort compare subset_1 ) in
let firstchars_2 , lastchars_2 = split ( map ( fun subset -> ( 33 , length subset + 33 - 1 ) ) subsets_2 ) in
let numOfLongHorMetrics =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " hhea " ) ! tables with
| ( _ , _ , o , l ) :: _ -> let b = mk_b ( i32toi o ) in read_hhea_table b
| _ -> 0
in
let hmtxdata =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " hmtx " ) ! tables with
| ( _ , _ , o , _ ) :: _ -> read_hmtx_table numOfLongHorMetrics ( mk_b ( i32toi o ) )
| [] -> raise ( Pdf . PDFError " No hmtx table found in TrueType font " )
in
let widths_1 =
calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 ! glyphcodes hmtxdata
in
let widths_2 =
map3
( fun f l s -> calculate_width_higher unitsPerEm f l s ! glyphcodes hmtxdata )
firstchars_2 lastchars_2 subsets_2
in
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
let stemv = 0 in
let b = mk_b ( i32toi locaoffset ) in
let loca = read_loca_table indexToLocFormat numGlyphs b in
let glyfoffset , glyflength =
match keep ( function ( t , _ , _ , _ ) -> string_of_tag t = " glyf " ) ! tables with
| ( _ , _ , o , l ) :: _ -> o , l
| [] -> raise ( Pdf . PDFError " No glyf table found in TrueType font " )
in
let main_subset =
subset_font major minor ! tables indexToLocFormat subset_1
encoding ! glyphcodes loca mk_b glyfoffset data
in
let seconds_subsets =
map
( fun subset ->
subset_font
major minor ! tables indexToLocFormat subset Pdftext . ImplicitInFontFile
! glyphcodes loca mk_b glyfoffset data )
2023-07-06 17:26:33 +02:00
subsets_2
2023-07-07 17:33:07 +02:00
in
let seconds_tounicodes =
map
( fun subset ->
if subset = [] then None else
let h = null_hash () in
2023-07-12 13:49:03 +02:00
iter2
2023-07-07 17:33:07 +02:00
( fun n u ->
let s = implode ( tl ( tl ( explode ( Pdftext . utf16be_of_codepoints [ u ] ) ) ) ) in
Hashtbl . add h n s )
( map ( ( + ) 33 ) ( indx0 subset ) )
subset ;
Some h )
subsets_2
in
let one =
{ flags = flags_1 ; minx ; miny ; maxx ; maxy ; italicangle ; ascent ; descent ;
2023-07-12 13:49:03 +02:00
capheight ; stemv ; xheight ; avgwidth ; maxwidth ; firstchar = firstchar_1 ;
lastchar = lastchar_1 ; widths = widths_1 ; subset_fontfile = main_subset ;
subset = subset_1 ; tounicode = None }
2023-07-07 17:33:07 +02:00
in
let twos =
map6
( fun firstchar lastchar widths subset_fontfile subset tounicode ->
{ flags = flags_2 ; minx ; miny ; maxx ; maxy ; italicangle ; ascent ; descent ;
capheight ; stemv ; xheight ; avgwidth ; maxwidth ; firstchar ; lastchar ;
widths ; subset_fontfile ; subset ; tounicode } )
firstchars_2 lastchars_2 widths_2 seconds_subsets subsets_2 seconds_tounicodes
in
2023-07-12 13:49:03 +02:00
(* Printf.printf "\nMain subset:\n"; debug_t one; *)
2023-07-07 17:33:07 +02:00
write_font " one.ttf " one . subset_fontfile ;
2023-07-12 13:49:03 +02:00
(* Printf.printf "\nHigher subset:\n"; debug_t ( hd twos ) ; *)
2023-07-11 17:53:21 +02:00
if twos < > [] then write_font " two.ttf " ( hd twos ) . subset_fontfile ;
2023-07-07 17:33:07 +02:00
one :: twos
2023-07-06 17:26:33 +02:00
let parse ~ subset data encoding =
2023-07-05 17:22:16 +02:00
try parse ~ subset data encoding with
2023-07-06 17:26:33 +02:00
e -> raise ( Cpdferror . error ( " Failed to parse TrueType font: " ^ Printexc . to_string e ) )