cpdf-source/cpdfembed.ml

111 lines
4.0 KiB
OCaml
Raw Normal View History

2022-09-12 17:06:56 +02:00
(* Embed a font *)
2022-09-11 15:52:08 +02:00
open Pdfutil
2022-09-11 21:07:55 +02:00
(* For the first stage of our embedder, we are only allowing Latin, and we don't actually subset.
2022-09-11 15:52:08 +02:00
a) Get a list of Unicode codepoints;
b) See which of them are in the glyph list;
c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes;
d) Build a font to do just those;
2022-09-11 21:07:55 +02:00
e) We put missing glyph or similar for any character not in the encoding
2022-09-11 15:52:08 +02:00
(* FUTURE *)
1) Actually subset the font to save size
2) Allow characters not in the standard encodings by builing one or more secondary subsets *)
2022-09-11 16:19:59 +02:00
(*let () =
2022-09-11 15:52:08 +02:00
iter
(fun u ->
Printf.printf "unicode %i --> " u;
let glyphname = Hashtbl.find glyphlist_table [u] in
Printf.printf "glyph name %s --> " glyphname;
let pdfcode = Hashtbl.find encoding_table glyphname in
Printf.printf "pdf code %i\n" pdfcode)
2022-09-11 21:07:55 +02:00
unicodepoints *)
let pdfcode_of_unicode_codepoint encoding_table glyphlist_table u =
2022-09-11 15:52:08 +02:00
try
Some (Hashtbl.find encoding_table (Hashtbl.find glyphlist_table [u]))
with
Not_found -> None
2022-09-11 21:07:55 +02:00
let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints =
2022-09-11 15:52:08 +02:00
setify
(option_map
2022-09-12 17:06:56 +02:00
(fun u ->
match
pdfcode_of_unicode_codepoint encoding_table glyphlist_table u
with
| Some _ -> Some u
| None -> None)
2022-09-11 15:52:08 +02:00
codepoints)
let fontnum = ref 0
let basename () =
incr fontnum;
"AAAAA" ^ string_of_char (char_of_int (!fontnum + 65))
let string_of_encoding = function
| Pdftext.WinAnsiEncoding -> "/WinAnsiEncoding"
| Pdftext.MacRomanEncoding -> "/MacRomanEncoding"
| Pdftext.StandardEncoding -> "/StandardEncoding"
| _ -> failwith "unknown encoding"
2022-09-13 18:20:37 +02:00
(* FIXME add "" = full subset *)
2022-09-11 21:07:55 +02:00
let embed_truetype pdf ~fontfile ~fontname ~text ~encoding =
let unicodepoints = Pdftext.codepoints_of_utf8 text in
let glyphlist_table = Pdfglyphlist.reverse_glyph_hashes () in
let encoding_table = Pdftext.reverse_table_of_encoding encoding in
let accepted_unicodepoints =
map
2022-09-12 17:06:56 +02:00
(fun u ->
(u, pdfcode_of_unicode_codepoint encoding_table glyphlist_table u))
(calc_accepted_unicodepoints
encoding_table glyphlist_table unicodepoints)
2022-09-11 21:07:55 +02:00
in
2022-09-12 17:06:56 +02:00
let f = Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile in
2022-09-11 21:07:55 +02:00
let widths =
2022-09-12 17:06:56 +02:00
Pdf.Array
(map (fun x -> Pdf.Integer x)
(Array.to_list f.Cpdftruetype.widths))
2022-09-11 21:07:55 +02:00
in
let name_1 = basename () in
let fontfile =
let len = Pdfio.bytes_size fontfile in
Pdf.Stream
{contents =
2022-09-12 17:06:56 +02:00
(Pdf.Dictionary
[("/Length", Pdf.Integer len); ("/Length1", Pdf.Integer len)],
2022-09-11 21:07:55 +02:00
Pdf.Got fontfile)}
in
let fontfile_num = Pdf.addobj pdf fontfile in
2022-09-12 17:06:56 +02:00
let module TT = Cpdftruetype in
2022-09-11 21:07:55 +02:00
let fontdescriptor =
Pdfread.parse_single_object
2022-09-12 17:06:56 +02:00
(Printf.sprintf
"<</Type/FontDescriptor/FontName/%s+%s/Flags %i/FontBBox[%i %i %i %i] \
/ItalicAngle %i/Ascent %i/Descent %i/CapHeight %i/StemV %i/XHeight \
%i/AvgWidth %i/MaxWidth %i/FontFile2 %i 0 R>>"
name_1 fontname f.TT.flags f.TT.minx f.TT.miny f.TT.maxx f.TT.maxy
f.TT.italicangle f.TT.ascent f.TT.descent f.TT.capheight f.TT.stemv
f.TT.xheight f.TT.avgwidth f.TT.maxwidth fontfile_num)
2022-09-11 21:07:55 +02:00
in
let fontdesc_num = Pdf.addobj pdf fontdescriptor in
let font =
Pdf.add_dict_entry
(Pdfread.parse_single_object
2022-09-12 17:06:56 +02:00
(Printf.sprintf
"<</Type/Font/Subtype/TrueType/BaseFont/%s+%s/FontDescriptor %i 0 R\
/Encoding%s/FirstChar %i/LastChar %i>>"
name_1 fontname fontdesc_num (string_of_encoding encoding)
f.TT.firstchar f.TT.lastchar))
2022-09-11 21:07:55 +02:00
"/Widths"
widths
in
Pdf.addobj pdf font
2022-09-13 18:20:37 +02:00
2022-09-13 18:59:13 +02:00
(* For now, to get a Pdftext.font, put it in an empty PDF and then read it back. This will be fixed later. We just need it so that existing code which uses a charcode extractor can be reused. *)
2022-09-13 18:20:37 +02:00
let font_of_truetype ~fontfile ~fontname ~encoding =
let pdf = Pdf.empty () in
let fontobjnum = embed_truetype pdf ~fontfile ~fontname ~text:"" ~encoding in
Pdftext.read_font pdf (Pdf.lookup_obj pdf fontobjnum)