2022-09-12 17:06:56 +02:00
|
|
|
(* Embed a font *)
|
2022-09-11 15:52:08 +02:00
|
|
|
open Pdfutil
|
|
|
|
|
2022-09-11 21:07:55 +02:00
|
|
|
(* For the first stage of our embedder, we are only allowing Latin, and we don't actually subset.
|
2022-09-11 15:52:08 +02:00
|
|
|
a) Get a list of Unicode codepoints;
|
|
|
|
b) See which of them are in the glyph list;
|
|
|
|
c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes;
|
|
|
|
d) Build a font to do just those;
|
2022-09-11 21:07:55 +02:00
|
|
|
e) We put missing glyph or similar for any character not in the encoding
|
2022-09-11 15:52:08 +02:00
|
|
|
(* FUTURE *)
|
|
|
|
1) Actually subset the font to save size
|
|
|
|
2) Allow characters not in the standard encodings by builing one or more secondary subsets *)
|
|
|
|
|
2022-09-11 16:19:59 +02:00
|
|
|
(*let () =
|
2022-09-11 15:52:08 +02:00
|
|
|
iter
|
|
|
|
(fun u ->
|
|
|
|
Printf.printf "unicode %i --> " u;
|
|
|
|
let glyphname = Hashtbl.find glyphlist_table [u] in
|
|
|
|
Printf.printf "glyph name %s --> " glyphname;
|
|
|
|
let pdfcode = Hashtbl.find encoding_table glyphname in
|
|
|
|
Printf.printf "pdf code %i\n" pdfcode)
|
2022-09-11 21:07:55 +02:00
|
|
|
unicodepoints *)
|
|
|
|
|
|
|
|
let pdfcode_of_unicode_codepoint encoding_table glyphlist_table u =
|
2022-09-11 15:52:08 +02:00
|
|
|
try
|
|
|
|
Some (Hashtbl.find encoding_table (Hashtbl.find glyphlist_table [u]))
|
|
|
|
with
|
|
|
|
Not_found -> None
|
|
|
|
|
2022-09-11 21:07:55 +02:00
|
|
|
let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints =
|
2022-09-11 15:52:08 +02:00
|
|
|
setify
|
|
|
|
(option_map
|
2022-09-12 17:06:56 +02:00
|
|
|
(fun u ->
|
|
|
|
match
|
|
|
|
pdfcode_of_unicode_codepoint encoding_table glyphlist_table u
|
|
|
|
with
|
|
|
|
| Some _ -> Some u
|
|
|
|
| None -> None)
|
2022-09-11 15:52:08 +02:00
|
|
|
codepoints)
|
|
|
|
|
|
|
|
let fontnum = ref 0
|
|
|
|
|
|
|
|
let basename () =
|
|
|
|
incr fontnum;
|
|
|
|
"AAAAA" ^ string_of_char (char_of_int (!fontnum + 65))
|
|
|
|
|
|
|
|
let string_of_encoding = function
|
|
|
|
| Pdftext.WinAnsiEncoding -> "/WinAnsiEncoding"
|
|
|
|
| Pdftext.MacRomanEncoding -> "/MacRomanEncoding"
|
|
|
|
| Pdftext.StandardEncoding -> "/StandardEncoding"
|
|
|
|
| _ -> failwith "unknown encoding"
|
|
|
|
|
2022-09-13 18:20:37 +02:00
|
|
|
(* FIXME add "" = full subset *)
|
2022-09-11 21:07:55 +02:00
|
|
|
let embed_truetype pdf ~fontfile ~fontname ~text ~encoding =
|
|
|
|
let unicodepoints = Pdftext.codepoints_of_utf8 text in
|
|
|
|
let glyphlist_table = Pdfglyphlist.reverse_glyph_hashes () in
|
|
|
|
let encoding_table = Pdftext.reverse_table_of_encoding encoding in
|
|
|
|
let accepted_unicodepoints =
|
|
|
|
map
|
2022-09-12 17:06:56 +02:00
|
|
|
(fun u ->
|
|
|
|
(u, pdfcode_of_unicode_codepoint encoding_table glyphlist_table u))
|
|
|
|
(calc_accepted_unicodepoints
|
|
|
|
encoding_table glyphlist_table unicodepoints)
|
2022-09-11 21:07:55 +02:00
|
|
|
in
|
2022-09-12 17:06:56 +02:00
|
|
|
let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile in
|
2022-09-11 21:07:55 +02:00
|
|
|
let name_1 = basename () in
|
|
|
|
let fontfile =
|
|
|
|
let len = Pdfio.bytes_size fontfile in
|
|
|
|
Pdf.Stream
|
|
|
|
{contents =
|
2022-09-12 17:06:56 +02:00
|
|
|
(Pdf.Dictionary
|
|
|
|
[("/Length", Pdf.Integer len); ("/Length1", Pdf.Integer len)],
|
2022-09-11 21:07:55 +02:00
|
|
|
Pdf.Got fontfile)}
|
|
|
|
in
|
|
|
|
let fontfile_num = Pdf.addobj pdf fontfile in
|
2022-09-12 17:06:56 +02:00
|
|
|
let module TT = Cpdftruetype in
|
2022-09-15 19:13:42 +02:00
|
|
|
let open Pdftext in
|
|
|
|
SimpleFont
|
|
|
|
{fonttype = Truetype;
|
|
|
|
basefont = Printf.sprintf "/%s+%s" name_1 fontname;
|
|
|
|
fontmetrics = None;
|
|
|
|
firstchar = f.TT.firstchar;
|
|
|
|
lastchar = f.TT.lastchar;
|
|
|
|
widths = f.TT.widths;
|
|
|
|
fontdescriptor = Some
|
|
|
|
{ascent = float_of_int f.TT.ascent;
|
|
|
|
descent = float_of_int f.TT.descent;
|
|
|
|
avgwidth = float_of_int f.TT.avgwidth;
|
|
|
|
maxwidth = float_of_int f.TT.maxwidth;
|
|
|
|
flags = f.TT.flags;
|
|
|
|
italicangle = float_of_int f.TT.italicangle;
|
|
|
|
capheight = float_of_int f.TT.capheight;
|
|
|
|
xheight = float_of_int f.TT.xheight;
|
|
|
|
stemv = float_of_int f.TT.stemv;
|
|
|
|
fontbbox = (float_of_int f.TT.minx, float_of_int f.TT.miny,
|
|
|
|
float_of_int f.TT.maxx, float_of_int f.TT.maxy);
|
|
|
|
fontfile = Some (FontFile2 fontfile_num);
|
|
|
|
charset = None;
|
|
|
|
tounicode = None};
|
|
|
|
encoding}
|
|
|
|
(*
|
2022-09-11 21:07:55 +02:00
|
|
|
let fontdescriptor =
|
|
|
|
Pdfread.parse_single_object
|
2022-09-12 17:06:56 +02:00
|
|
|
(Printf.sprintf
|
|
|
|
"<</Type/FontDescriptor/FontName/%s+%s/Flags %i/FontBBox[%i %i %i %i] \
|
|
|
|
/ItalicAngle %i/Ascent %i/Descent %i/CapHeight %i/StemV %i/XHeight \
|
|
|
|
%i/AvgWidth %i/MaxWidth %i/FontFile2 %i 0 R>>"
|
|
|
|
name_1 fontname f.TT.flags f.TT.minx f.TT.miny f.TT.maxx f.TT.maxy
|
|
|
|
f.TT.italicangle f.TT.ascent f.TT.descent f.TT.capheight f.TT.stemv
|
|
|
|
f.TT.xheight f.TT.avgwidth f.TT.maxwidth fontfile_num)
|
2022-09-11 21:07:55 +02:00
|
|
|
in
|
|
|
|
let fontdesc_num = Pdf.addobj pdf fontdescriptor in
|
|
|
|
let font =
|
|
|
|
Pdf.add_dict_entry
|
|
|
|
(Pdfread.parse_single_object
|
2022-09-12 17:06:56 +02:00
|
|
|
(Printf.sprintf
|
|
|
|
"<</Type/Font/Subtype/TrueType/BaseFont/%s+%s/FontDescriptor %i 0 R\
|
|
|
|
/Encoding%s/FirstChar %i/LastChar %i>>"
|
|
|
|
name_1 fontname fontdesc_num (string_of_encoding encoding)
|
|
|
|
f.TT.firstchar f.TT.lastchar))
|
2022-09-11 21:07:55 +02:00
|
|
|
"/Widths"
|
|
|
|
widths
|
|
|
|
in
|
2022-09-15 19:13:42 +02:00
|
|
|
Pdf.addobj pdf font*)
|