Mulltiple subsets
This commit is contained in:
parent
344e4d6266
commit
1e3e19ae05
|
@ -89,6 +89,6 @@ let make_fontpack_hashtable fs =
|
||||||
table
|
table
|
||||||
|
|
||||||
let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
|
let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding =
|
||||||
let found_codepoints, fs = Cpdftruetype.parse ~subset:codepoints fontfile encoding in
|
let fs = Cpdftruetype.parse ~subset:codepoints fontfile encoding in
|
||||||
let subsets_and_their_fonts = map (make_single_font ~fontname ~encoding pdf) fs in
|
let subsets_and_their_fonts = map (make_single_font ~fontname ~encoding pdf) fs in
|
||||||
(map snd subsets_and_their_fonts, make_fontpack_hashtable subsets_and_their_fonts)
|
(map snd subsets_and_their_fonts, make_fontpack_hashtable subsets_and_their_fonts)
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
open Pdfio
|
open Pdfio
|
||||||
|
|
||||||
|
(* FIXME Proper widths for .notdef, and warn on .notdef being produced *)
|
||||||
(* FIXME Add suport for composite glyphs *)
|
(* FIXME Add suport for composite glyphs *)
|
||||||
(* FIXME No need for bitstream - everything is byte based, so we can use a normal input *)
|
(* FIXME No need for bitstream - everything is byte based, so we can use a normal input *)
|
||||||
(* FIXME Get rid of double-calling of this code to 1) make font then 2) collect chars then 3) subset it i.e the subset = [] stuff *)
|
(* FIXME Get rid of double-calling of this code to 1) make font then 2) collect chars then 3) subset it i.e the subset = [] stuff *)
|
||||||
(* FIXME Subset names better than AAAAAB *)
|
(* FIXME Subset names better than AAAAAB *)
|
||||||
(* FIXME Proper table choice mechanism *)
|
|
||||||
(* FIXME All uses - add text / drawing / texttopdf / table of contents *)
|
(* FIXME All uses - add text / drawing / texttopdf / table of contents *)
|
||||||
(* FIXME Work across AND? *)
|
(* FIXME Work across AND? *)
|
||||||
let dbg = ref true
|
let dbg = ref false
|
||||||
|
|
||||||
let _ =
|
let _ =
|
||||||
Pdfe.logger := (fun s -> print_string s; flush stdout)
|
Pdfe.logger := (fun s -> print_string s; flush stdout)
|
||||||
|
@ -151,7 +151,9 @@ let print_encoding_table format (table : (int, int) Hashtbl.t) =
|
||||||
iter
|
iter
|
||||||
(fun (c, gi) ->
|
(fun (c, gi) ->
|
||||||
let str = Printf.sprintf "%04X" c in
|
let str = Printf.sprintf "%04X" c in
|
||||||
if !dbg then Printf.printf "Char %s (%s) is at glyph index %i\n" str (try Hashtbl.find unicodetable str with Not_found -> "Not_found") gi)
|
if !dbg then
|
||||||
|
Printf.printf "Char %s (%s) is at glyph index %i\n"
|
||||||
|
str (try Hashtbl.find unicodetable str with Not_found -> "Not_found") gi)
|
||||||
l
|
l
|
||||||
|
|
||||||
let read_encoding_table fmt length version b =
|
let read_encoding_table fmt length version b =
|
||||||
|
@ -392,7 +394,7 @@ let subset_font major minor tables indexToLocFormat subset encoding cmap loca mk
|
||||||
glyf_table_size_reduction := i32sub (i32add ttlength (padding32 ttlength)) paddedlen;
|
glyf_table_size_reduction := i32sub (i32add ttlength (padding32 ttlength)) paddedlen;
|
||||||
newlen
|
newlen
|
||||||
else if string_of_tag tag = "cmap" && subset <> [] && encoding = Pdftext.ImplicitInFontFile then
|
else if string_of_tag tag = "cmap" && subset <> [] && encoding = Pdftext.ImplicitInFontFile then
|
||||||
let bs = make_write_bitstream () in~/Dropbox/cpdfwork/ttf/10000fonts/AEROPLAN.TTF
|
let bs = make_write_bitstream () in
|
||||||
let newlen = write_cmap_table subset cmap bs in
|
let newlen = write_cmap_table subset cmap bs in
|
||||||
let paddedlen = i32ofi (bytes_size (bytes_of_write_bitstream bs)) in
|
let paddedlen = i32ofi (bytes_size (bytes_of_write_bitstream bs)) in
|
||||||
if !dbg then Printf.printf "new cmap table length = %li\n" newlen;
|
if !dbg then Printf.printf "new cmap table length = %li\n" newlen;
|
||||||
|
@ -491,11 +493,14 @@ let write_font filename data =
|
||||||
close_out fh
|
close_out fh
|
||||||
|
|
||||||
let find_main encoding subset =
|
let find_main encoding subset =
|
||||||
cleave subset 3
|
(*(take subset 3, [drop subset 3])*)
|
||||||
(*let encoding_table = Pdftext.table_of_encoding encoding in
|
let encoding_table = Pdftext.table_of_encoding encoding in
|
||||||
|
let first, rest =
|
||||||
List.partition
|
List.partition
|
||||||
(fun u -> try ignore (Hashtbl.find encoding_table u); true with Not_found -> false)
|
(fun u -> try ignore (Hashtbl.find encoding_table u); true with Not_found -> false)
|
||||||
subset*)
|
subset
|
||||||
|
in
|
||||||
|
(first, splitinto 224 rest)
|
||||||
|
|
||||||
let parse ~subset data encoding =
|
let parse ~subset data encoding =
|
||||||
if !dbg then
|
if !dbg then
|
||||||
|
@ -549,7 +554,7 @@ let parse ~subset data encoding =
|
||||||
in
|
in
|
||||||
let ascent, descent, capheight, xheight, avgwidth =
|
let ascent, descent, capheight, xheight, avgwidth =
|
||||||
match os2 with
|
match os2 with
|
||||||
| None -> (0, 0, 0, 0, 0) (* FIXME raise (Pdf.PDFError "No os/2 table found in truetype font") *)
|
| None -> raise (Pdf.PDFError "No os/2 table found in truetype font")
|
||||||
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
|
| Some (o, l) -> let b = mk_b (i32toi o) in read_os2_table unitsPerEm b (i32toi l)
|
||||||
in
|
in
|
||||||
let italicangle =
|
let italicangle =
|
||||||
|
@ -588,7 +593,7 @@ let parse ~subset data encoding =
|
||||||
if !dbg then Printf.printf "subtable has format %i, length %i, version %i\n" fmt lngth version;
|
if !dbg then Printf.printf "subtable has format %i, length %i, version %i\n" fmt lngth version;
|
||||||
let got_glyphcodes = read_encoding_table fmt lngth version b in
|
let got_glyphcodes = read_encoding_table fmt lngth version b in
|
||||||
(*if fmt = 4 then print_encoding_table 4 got_glyphcodes;*)
|
(*if fmt = 4 then print_encoding_table 4 got_glyphcodes;*)
|
||||||
if fmt = 4 then Hashtbl.iter (Hashtbl.add !glyphcodes) got_glyphcodes
|
(*if fmt = 4 then *)Hashtbl.iter (Hashtbl.add !glyphcodes) got_glyphcodes
|
||||||
done;
|
done;
|
||||||
end;
|
end;
|
||||||
let maxpoffset, maxplength =
|
let maxpoffset, maxplength =
|
||||||
|
@ -605,19 +610,19 @@ let parse ~subset data encoding =
|
||||||
| (_, _, o, l)::_ -> o, l
|
| (_, _, o, l)::_ -> o, l
|
||||||
| [] -> raise (Pdf.PDFError "No loca table found in TrueType font")
|
| [] -> raise (Pdf.PDFError "No loca table found in TrueType font")
|
||||||
in
|
in
|
||||||
let subset_1, subset_2 = find_main encoding subset in
|
let subset_1, subsets_2 = find_main encoding subset in
|
||||||
if !dbg && subset <> [] then
|
(*if !dbg && subset <> [] then
|
||||||
begin
|
begin
|
||||||
Printf.printf "***********Chars for main WinAnsiEncoding subset:\n";
|
Printf.printf "***********Chars for main WinAnsiEncoding subset:\n";
|
||||||
iter (Printf.printf "U+%04X ") subset_1;
|
iter (Printf.printf "U+%04X ") subset_1;
|
||||||
Printf.printf "\n***********Chars for higher subset:\n";
|
Printf.printf "\n***********Chars for higher subset:\n";
|
||||||
iter (Printf.printf "U+%04X ") subset_2;
|
iter (Printf.printf "U+%04X ") subset_2;
|
||||||
Printf.printf "\n";
|
Printf.printf "\n";
|
||||||
end;
|
end;*)
|
||||||
let flags_1 = calculate_flags false italicangle in
|
let flags_1 = calculate_flags false italicangle in
|
||||||
let flags_2 = calculate_flags true italicangle in
|
let flags_2 = calculate_flags true italicangle in
|
||||||
let firstchar_1, lastchar_1 = calculate_limits subset_1 in
|
let firstchar_1, lastchar_1 = calculate_limits subset_1 in
|
||||||
let firstchar_2, lastchar_2 = (33, length subset_2 + 33 - 1) in
|
let firstchars_2, lastchars_2 = split (map (fun subset -> (33, length subset + 33 - 1)) subsets_2) in
|
||||||
let numOfLongHorMetrics =
|
let numOfLongHorMetrics =
|
||||||
match keep (function (t, _, _, _) -> string_of_tag t = "hhea") !tables with
|
match keep (function (t, _, _, _) -> string_of_tag t = "hhea") !tables with
|
||||||
| (_, _, o, l)::_ -> let b = mk_b (i32toi o) in read_hhea_table b
|
| (_, _, o, l)::_ -> let b = mk_b (i32toi o) in read_hhea_table b
|
||||||
|
@ -628,9 +633,12 @@ let parse ~subset data encoding =
|
||||||
| (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o))
|
| (_, _, o, _)::_ -> read_hmtx_table numOfLongHorMetrics (mk_b (i32toi o))
|
||||||
| [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font")
|
| [] -> raise (Pdf.PDFError "No hmtx table found in TrueType font")
|
||||||
in
|
in
|
||||||
(*Printf.printf "firstchar_1, lastchar_1, firstchar_2, lastchar_2 = %i, %i, %i%, %i\n" firstchar_1 lastchar_1 firstchar_2 lastchar_2;*)
|
|
||||||
let widths_1 = calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 !glyphcodes hmtxdata in
|
let widths_1 = calculate_widths unitsPerEm encoding firstchar_1 lastchar_1 subset_1 !glyphcodes hmtxdata in
|
||||||
let widths_2 = calculate_width_higher unitsPerEm firstchar_2 lastchar_2 subset_2 !glyphcodes hmtxdata in
|
let widths_2 =
|
||||||
|
map3
|
||||||
|
(fun f l s -> calculate_width_higher unitsPerEm f l s !glyphcodes hmtxdata)
|
||||||
|
firstchars_2 lastchars_2 subsets_2
|
||||||
|
in
|
||||||
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
|
let maxwidth = calculate_maxwidth unitsPerEm hmtxdata in
|
||||||
let stemv = calculate_stemv () in
|
let stemv = calculate_stemv () in
|
||||||
let b = mk_b (i32toi locaoffset) in
|
let b = mk_b (i32toi locaoffset) in
|
||||||
|
@ -640,47 +648,53 @@ let parse ~subset data encoding =
|
||||||
| (_, _, o, l)::_ -> o, l
|
| (_, _, o, l)::_ -> o, l
|
||||||
| [] -> raise (Pdf.PDFError "No glyf table found in TrueType font")
|
| [] -> raise (Pdf.PDFError "No glyf table found in TrueType font")
|
||||||
in
|
in
|
||||||
(*Printf.printf "Calculate main subset\n";*)
|
|
||||||
let main_subset =
|
let main_subset =
|
||||||
subset_font major minor !tables indexToLocFormat subset_1
|
subset_font major minor !tables indexToLocFormat subset_1
|
||||||
encoding !glyphcodes loca mk_b glyfoffset data
|
encoding !glyphcodes loca mk_b glyfoffset data
|
||||||
in
|
in
|
||||||
(*Printf.printf "Calculate higher subset\n";*)
|
let seconds_subsets =
|
||||||
let second_subset =
|
map
|
||||||
subset_font major minor !tables indexToLocFormat subset_2
|
(fun subset ->
|
||||||
Pdftext.ImplicitInFontFile !glyphcodes loca mk_b glyfoffset data
|
subset_font
|
||||||
|
major minor !tables indexToLocFormat subset Pdftext.ImplicitInFontFile
|
||||||
|
!glyphcodes loca mk_b glyfoffset data)
|
||||||
|
subsets_2
|
||||||
in
|
in
|
||||||
let second_tounicode =
|
let seconds_tounicodes =
|
||||||
|
map
|
||||||
|
(fun subset ->
|
||||||
if subset = [] then None else
|
if subset = [] then None else
|
||||||
let h = null_hash () in
|
let h = null_hash () in
|
||||||
List.iter2
|
List.iter2
|
||||||
(fun n u ->
|
(fun n u ->
|
||||||
let s = implode (tl (tl (explode (Pdftext.utf16be_of_codepoints [u])))) in
|
let s = implode (tl (tl (explode (Pdftext.utf16be_of_codepoints [u])))) in
|
||||||
Hashtbl.add h n s)
|
Hashtbl.add h n s)
|
||||||
(map (( + ) 33) (indx0 subset_2))
|
(map (( + ) 33) (indx0 subset))
|
||||||
subset_2;
|
subset;
|
||||||
Some h
|
Some h)
|
||||||
|
subsets_2
|
||||||
in
|
in
|
||||||
(*Printf.printf "returning the fonts. Job done.\n";*)
|
|
||||||
let one =
|
let one =
|
||||||
{flags = flags_1; minx; miny; maxx; maxy; italicangle; ascent; descent;
|
{flags = flags_1; minx; miny; maxx; maxy; italicangle; ascent; descent;
|
||||||
capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_1; lastchar = lastchar_1;
|
capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_1; lastchar = lastchar_1;
|
||||||
widths = widths_1; subset_fontfile = main_subset; subset = subset_1; tounicode = None}
|
widths = widths_1; subset_fontfile = main_subset; subset = subset_1; tounicode = None}
|
||||||
in
|
in
|
||||||
let two =
|
let twos =
|
||||||
|
map6
|
||||||
|
(fun firstchar lastchar widths subset_fontfile subset tounicode ->
|
||||||
{flags = flags_2; minx; miny; maxx; maxy; italicangle; ascent; descent;
|
{flags = flags_2; minx; miny; maxx; maxy; italicangle; ascent; descent;
|
||||||
capheight; stemv; xheight; avgwidth; maxwidth; firstchar = firstchar_2; lastchar = lastchar_2;
|
capheight; stemv; xheight; avgwidth; maxwidth; firstchar; lastchar;
|
||||||
widths = widths_2; subset_fontfile = second_subset; subset = subset_2;
|
widths; subset_fontfile; subset; tounicode})
|
||||||
tounicode = second_tounicode}
|
firstchars_2 lastchars_2 widths_2 seconds_subsets subsets_2 seconds_tounicodes
|
||||||
in
|
in
|
||||||
(*Printf.printf "\nMain subset:\n";
|
(*Printf.printf "\nMain subset:\n";
|
||||||
debug_t one;
|
debug_t one;
|
||||||
write_font "one.ttf" one.subset_fontfile;
|
write_font "one.ttf" one.subset_fontfile;
|
||||||
Printf.printf "\nHigher subset:\n";
|
Printf.printf "\nHigher subset:\n";
|
||||||
debug_t two;
|
debug_t two;
|
||||||
write_font "two.ttf" two.subset_fontfile;*)
|
write_font "two.ttf" (hd twos).subset_fontfile;*)
|
||||||
(subset, [one; two])
|
one::twos
|
||||||
|
|
||||||
(*let parse ~subset data encoding =
|
let parse ~subset data encoding =
|
||||||
try parse ~subset data encoding with
|
try parse ~subset data encoding with
|
||||||
e -> raise (Cpdferror.error ("Failed to parse TrueType font: " ^ Printexc.to_string e))*)
|
e -> raise (Cpdferror.error ("Failed to parse TrueType font: " ^ Printexc.to_string e))
|
||||||
|
|
|
@ -27,4 +27,4 @@ type t =
|
||||||
additional characters in the font. For subsetting, or to return a full
|
additional characters in the font. For subsetting, or to return a full
|
||||||
font-pack, you should supply a subset (a list of unicode codepoints whose
|
font-pack, you should supply a subset (a list of unicode codepoints whose
|
||||||
corresponding glyphs are required). *)
|
corresponding glyphs are required). *)
|
||||||
val parse : subset:int list -> Pdfio.bytes -> Pdftext.encoding -> int list * t list
|
val parse : subset:int list -> Pdfio.bytes -> Pdftext.encoding -> t list
|
||||||
|
|
Loading…
Reference in New Issue