First basic truetype subsetter working

This commit is contained in:
John Whitington 2023-07-05 13:38:50 +01:00
parent 0628cb0251
commit d67b633f03
3 changed files with 12 additions and 18 deletions

View File

@ -91,5 +91,4 @@ let make_fontpack_hashtable fs =
let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding = let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
let fs = Cpdftruetype.parse ~subset:codepoints fontfile encoding in let fs = Cpdftruetype.parse ~subset:codepoints fontfile encoding in
let subsets_and_their_fonts = map (make_single_font ~fontname ~encoding pdf) fs in let subsets_and_their_fonts = map (make_single_font ~fontname ~encoding pdf) fs in
Printf.printf "embed_truetype finished\n";
(map snd subsets_and_their_fonts, make_fontpack_hashtable subsets_and_their_fonts) (map snd subsets_and_their_fonts, make_fontpack_hashtable subsets_and_their_fonts)

View File

@ -10,7 +10,7 @@ let rec of_utf8_with_newlines fontpack fontsize t =
(fun u -> (fun u ->
match Cpdfembed.get_char fontpack u with match Cpdfembed.get_char fontpack u with
| Some (c, n, f) -> | Some (c, n, f) ->
Printf.printf "Charcode %i, font number %i\n" c n; (*Printf.printf "Charcode %i, font number %i\n" c n;*)
begin if n <> !currfont then begin if n <> !currfont then
begin begin
if !currtext <> [] then items := Cpdftype.Text (rev !currtext)::!items; if !currtext <> [] then items := Cpdftype.Text (rev !currtext)::!items;

View File

@ -7,11 +7,11 @@ open Pdfio
(* FIXME Create third, fourth etc. font when we need to *) (* FIXME Create third, fourth etc. font when we need to *)
(* FIXME Get rid of double-calling of this code to 1) make font then 2) collect chars then 3) subset it i.e the subset = [] stuff *) (* FIXME Get rid of double-calling of this code to 1) make font then 2) collect chars then 3) subset it i.e the subset = [] stuff *)
(* FIXME Check WinAnsiEncoding actually does the right thing, and covers all possible characters in that set *) (* FIXME Check WinAnsiEncoding actually does the right thing, and covers all possible characters in that set *)
let dbg = ref true (* FIXME Subset names better than AAAAAB *)
let dbg = ref false
(* FIXME: remove *) (*let _ =
let _ = Pdfe.logger := (fun s -> print_string s; flush stdout)*)
Pdfe.logger := (fun s -> print_string s; flush stdout)
type t = type t =
{flags : int; {flags : int;
@ -284,7 +284,6 @@ let write_loca_table subset cmap indexToLocFormat bs loca =
write_entry loc off) write_entry loc off)
pairs; pairs;
let padding = if !len mod 4 = 0 then 0 else 4 - !len mod 4 in let padding = if !len mod 4 = 0 then 0 else 4 - !len mod 4 in
Printf.printf "***loca table - adding %i bytes of padding\n" padding;
for x = 1 to padding do putval bs 8 0l done for x = 1 to padding do putval bs 8 0l done
(* Write the notdef glyf, and any others in the subset *) (* Write the notdef glyf, and any others in the subset *)
@ -310,7 +309,6 @@ let write_glyf_table subset cmap bs mk_b glyfoffset loca =
in in
iter (fun (a, b) -> write_bytes bs a (i32sub b a)) byteranges; iter (fun (a, b) -> write_bytes bs a (i32sub b a)) byteranges;
let padding = if i32toi len mod 4 = 0 then 0 else 4 - i32toi len mod 4 in let padding = if i32toi len mod 4 = 0 then 0 else 4 - i32toi len mod 4 in
Printf.printf "***glyf table - adding %i bytes of padding\n" padding;
for x = 1 to padding do putval bs 8 0l done; for x = 1 to padding do putval bs 8 0l done;
len len
@ -332,7 +330,6 @@ let write_cmap_table subset cmap bs =
iter (fun gi -> putval bs 16 (i32ofi gi)) glyphindexes; (* glyph indexes *) iter (fun gi -> putval bs 16 (i32ofi gi)) glyphindexes; (* glyph indexes *)
let len = i32ofi (22 + 2 * length glyphindexes) in let len = i32ofi (22 + 2 * length glyphindexes) in
let padding = if i32toi len mod 4 = 0 then 0 else 4 - i32toi len mod 4 in let padding = if i32toi len mod 4 = 0 then 0 else 4 - i32toi len mod 4 in
Printf.printf "***cmap table - adding %i bytes of padding\n" padding;
for x = 1 to padding do putval bs 8 0l done; for x = 1 to padding do putval bs 8 0l done;
len len
@ -401,8 +398,6 @@ let subset_font major minor tables indexToLocFormat subset encoding cmap loca mk
if i < Array.length tables - 1 then if i < Array.length tables - 1 then
cut := i32add !cut (match tables.(i + 1) with (_, _, offset', _) -> i32sub offset' offset)) cut := i32add !cut (match tables.(i + 1) with (_, _, offset', _) -> i32sub offset' offset))
tables; tables;
Printf.printf "*********** after iteri\n";
(* Reduce offsets by the reduction in header table size *)
let header_size_reduction = i32ofi (16 * (Array.length tables - length !tablesout)) in let header_size_reduction = i32ofi (16 * (Array.length tables - length !tablesout)) in
let glyf_table_size_reduction = ref 0l in let glyf_table_size_reduction = ref 0l in
let cmap_table_size_reduction = ref 0l in let cmap_table_size_reduction = ref 0l in
@ -633,7 +628,7 @@ let parse ?(subset=[]) data encoding =
| [] -> raise (Pdf.PDFError "No loca table found in TrueType font") | [] -> raise (Pdf.PDFError "No loca table found in TrueType font")
in in
let subset_1, subset_2 = find_main encoding subset in let subset_1, subset_2 = find_main encoding subset in
(*if !dbg && subset <> [] then*) if !dbg && subset <> [] then
begin begin
Printf.printf "***********Chars for main WinAnsiEncoding subset:\n"; Printf.printf "***********Chars for main WinAnsiEncoding subset:\n";
iter (Printf.printf "U+%04X ") subset_1; iter (Printf.printf "U+%04X ") subset_1;
@ -655,7 +650,7 @@ let parse ?(subset=[]) data encoding =
| (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o)) | (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o))
| [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font") | [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font")
in in
Printf.printf "firstchar_1, lastchar_1, firstchar_2, lastchar_2 = %i, %i, %i%, %i\n" firstchar_1 lastchar_1 firstchar_2 lastchar_2; (*Printf.printf "firstchar_1, lastchar_1, firstchar_2, lastchar_2 = %i, %i, %i%, %i\n" firstchar_1 lastchar_1 firstchar_2 lastchar_2;*)
let widths_1 = calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 !glyphcodes hmtxdata in let widths_1 = calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 !glyphcodes hmtxdata in
let widths_2 = calculate_width_higher unitsPerEm firstchar_2 lastchar_2 subset_2 !glyphcodes hmtxdata in let widths_2 = calculate_width_higher unitsPerEm firstchar_2 lastchar_2 subset_2 !glyphcodes hmtxdata in
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
@ -667,12 +662,12 @@ let parse ?(subset=[]) data encoding =
| (_, _, o, l)::_ -> o, l | (_, _, o, l)::_ -> o, l
| [] -> raise (Pdf.PDFError "No glyf table found in TrueType font") | [] -> raise (Pdf.PDFError "No glyf table found in TrueType font")
in in
Printf.printf "Calculate main subset\n"; (*Printf.printf "Calculate main subset\n";*)
let main_subset = let main_subset =
subset_font major minor !tables indexToLocFormat subset_1 subset_font major minor !tables indexToLocFormat subset_1
encoding !glyphcodes loca mk_b glyfoffset data encoding !glyphcodes loca mk_b glyfoffset data
in in
Printf.printf "Calculate higher subset\n"; (*Printf.printf "Calculate higher subset\n";*)
let second_subset = let second_subset =
subset_font major minor !tables indexToLocFormat subset_2 subset_font major minor !tables indexToLocFormat subset_2
Pdftext.ImplicitInFontFile !glyphcodes loca mk_b glyfoffset data Pdftext.ImplicitInFontFile !glyphcodes loca mk_b glyfoffset data
@ -688,7 +683,7 @@ let parse ?(subset=[]) data encoding =
subset_2; subset_2;
Some h Some h
in in
Printf.printf "returning the fonts. Job done.\n"; (*Printf.printf "returning the fonts. Job done.\n";*)
let one = let one =
{flags = flags_1; minx; miny; maxx; maxy; italicangle; ascent; descent; {flags = flags_1; minx; miny; maxx; maxy; italicangle; ascent; descent;
capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_1; lastchar = lastchar_1; capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_1; lastchar = lastchar_1;
@ -700,10 +695,10 @@ let parse ?(subset=[]) data encoding =
widths = widths_2; subset_fontfile = second_subset; subset = subset_2; widths = widths_2; subset_fontfile = second_subset; subset = subset_2;
tounicode = second_tounicode} tounicode = second_tounicode}
in in
Printf.printf "\nMain subset:\n"; (*Printf.printf "\nMain subset:\n";
debug_t one; debug_t one;
write_font "one.ttf" one.subset_fontfile; write_font "one.ttf" one.subset_fontfile;
Printf.printf "\nHigher subset:\n"; Printf.printf "\nHigher subset:\n";
debug_t two; debug_t two;
write_font "two.ttf" two.subset_fontfile; write_font "two.ttf" two.subset_fontfile;*)
[one; two] [one; two]