2022-09-12 17:06:56 +02:00
(* Embed a font *)
2022-09-11 15:52:08 +02:00
open Pdfutil
2022-09-11 21:07:55 +02:00
(* For the first stage of our embedder, we are only allowing Latin, and we don't actually subset.
2022-09-11 15:52:08 +02:00
a ) Get a list of Unicode codepoints ;
b ) See which of them are in the glyph list ;
c ) See which of those are in ( StdEncoding | MacRomanEncoding | WinAnsiEncoding ) , and get their codes ;
d ) Build a font to do just those ;
2022-09-11 21:07:55 +02:00
e ) We put missing glyph or similar for any character not in the encoding
2022-09-11 15:52:08 +02:00
(* FUTURE *)
1 ) Actually subset the font to save size
2 ) Allow characters not in the standard encodings by builing one or more secondary subsets * )
2022-09-11 16:19:59 +02:00
(* let ( ) =
2022-09-11 15:52:08 +02:00
iter
( fun u ->
Printf . printf " unicode %i --> " u ;
let glyphname = Hashtbl . find glyphlist_table [ u ] in
Printf . printf " glyph name %s --> " glyphname ;
let pdfcode = Hashtbl . find encoding_table glyphname in
Printf . printf " pdf code %i \n " pdfcode )
2022-09-11 21:07:55 +02:00
unicodepoints * )
let pdfcode_of_unicode_codepoint encoding_table glyphlist_table u =
2022-09-11 15:52:08 +02:00
try
Some ( Hashtbl . find encoding_table ( Hashtbl . find glyphlist_table [ u ] ) )
with
Not_found -> None
2022-09-11 21:07:55 +02:00
let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints =
2022-09-11 15:52:08 +02:00
setify
( option_map
2022-09-12 17:06:56 +02:00
( fun u ->
match
pdfcode_of_unicode_codepoint encoding_table glyphlist_table u
with
| Some _ -> Some u
| None -> None )
2022-09-11 15:52:08 +02:00
codepoints )
let fontnum = ref 0
let basename () =
incr fontnum ;
" AAAAA " ^ string_of_char ( char_of_int ( ! fontnum + 65 ) )
let string_of_encoding = function
| Pdftext . WinAnsiEncoding -> " /WinAnsiEncoding "
| Pdftext . MacRomanEncoding -> " /MacRomanEncoding "
| Pdftext . StandardEncoding -> " /StandardEncoding "
| _ -> failwith " unknown encoding "
2022-09-13 18:20:37 +02:00
(* FIXME add "" = full subset *)
2022-09-11 21:07:55 +02:00
let embed_truetype pdf ~ fontfile ~ fontname ~ text ~ encoding =
let unicodepoints = Pdftext . codepoints_of_utf8 text in
let glyphlist_table = Pdfglyphlist . reverse_glyph_hashes () in
let encoding_table = Pdftext . reverse_table_of_encoding encoding in
let accepted_unicodepoints =
map
2022-09-12 17:06:56 +02:00
( fun u ->
( u , pdfcode_of_unicode_codepoint encoding_table glyphlist_table u ) )
( calc_accepted_unicodepoints
encoding_table glyphlist_table unicodepoints )
2022-09-11 21:07:55 +02:00
in
2022-09-12 17:06:56 +02:00
let f = Cpdftruetype . parse ~ subset : accepted_unicodepoints fontfile in
2022-09-11 21:07:55 +02:00
let widths =
2022-09-12 17:06:56 +02:00
Pdf . Array
( map ( fun x -> Pdf . Integer x )
( Array . to_list f . Cpdftruetype . widths ) )
2022-09-11 21:07:55 +02:00
in
let name_1 = basename () in
let fontfile =
let len = Pdfio . bytes_size fontfile in
Pdf . Stream
{ contents =
2022-09-12 17:06:56 +02:00
( Pdf . Dictionary
[ ( " /Length " , Pdf . Integer len ) ; ( " /Length1 " , Pdf . Integer len ) ] ,
2022-09-11 21:07:55 +02:00
Pdf . Got fontfile ) }
in
let fontfile_num = Pdf . addobj pdf fontfile in
2022-09-12 17:06:56 +02:00
let module TT = Cpdftruetype in
2022-09-11 21:07:55 +02:00
let fontdescriptor =
Pdfread . parse_single_object
2022-09-12 17:06:56 +02:00
( Printf . sprintf
" <</Type/FontDescriptor/FontName/%s+%s/Flags %i/FontBBox[%i %i %i %i] \
/ ItalicAngle % i / Ascent % i / Descent % i / CapHeight % i / StemV % i / XHeight \
% i / AvgWidth % i / MaxWidth % i / FontFile2 % i 0 R > > "
name_1 fontname f . TT . flags f . TT . minx f . TT . miny f . TT . maxx f . TT . maxy
f . TT . italicangle f . TT . ascent f . TT . descent f . TT . capheight f . TT . stemv
f . TT . xheight f . TT . avgwidth f . TT . maxwidth fontfile_num )
2022-09-11 21:07:55 +02:00
in
let fontdesc_num = Pdf . addobj pdf fontdescriptor in
let font =
Pdf . add_dict_entry
( Pdfread . parse_single_object
2022-09-12 17:06:56 +02:00
( Printf . sprintf
" <</Type/Font/Subtype/TrueType/BaseFont/%s+%s/FontDescriptor %i 0 R \
/ Encoding % s / FirstChar % i / LastChar % i > > "
name_1 fontname fontdesc_num ( string_of_encoding encoding )
f . TT . firstchar f . TT . lastchar ) )
2022-09-11 21:07:55 +02:00
" /Widths "
widths
in
Pdf . addobj pdf font
2022-09-13 18:20:37 +02:00
2022-09-13 18:59:13 +02:00
(* For now, to get a Pdftext.font, put it in an empty PDF and then read it back. This will be fixed later. We just need it so that existing code which uses a charcode extractor can be reused. *)
2022-09-13 18:20:37 +02:00
let font_of_truetype ~ fontfile ~ fontname ~ encoding =
let pdf = Pdf . empty () in
let fontobjnum = embed_truetype pdf ~ fontfile ~ fontname ~ text : " " ~ encoding in
Pdftext . read_font pdf ( Pdf . lookup_obj pdf fontobjnum )