From d8fe8c7b07f98c1b92453616c66d29c30b68bfbc Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sun, 11 Sep 2022 20:07:55 +0100 Subject: [PATCH] more --- cpdfembed.ml | 149 +++++++++++++++----------------------------------- cpdfembed.mli | 5 +- 2 files changed, 49 insertions(+), 105 deletions(-) diff --git a/cpdfembed.ml b/cpdfembed.ml index 362dc60..7212c86 100644 --- a/cpdfembed.ml +++ b/cpdfembed.ml @@ -1,26 +1,16 @@ (* Truetype font embedding example *) open Pdfutil -(* For the first stage of our embedder, we are only allowing standard encodings, and we don't actually subset. +(* For the first stage of our embedder, we are only allowing Latin, and we don't actually subset. a) Get a list of Unicode codepoints; b) See which of them are in the glyph list; c) See which of those are in (StdEncoding|MacRomanEncoding|WinAnsiEncoding), and get their codes; d) Build a font to do just those; - e) We put question marks for any character not in the encoding + e) We put missing glyph or similar for any character not in the encoding (* FUTURE *) 1) Actually subset the font to save size 2) Allow characters not in the standard encodings by builing one or more secondary subsets *) -(* UTF8 Input text *) -let text = "Noto Sans Black Àë" -let encoding = Pdftext.MacRomanEncoding - -let unicodepoints = Pdftext.codepoints_of_utf8 text - -let glyphlist_table = Pdfglyphlist.reverse_glyph_hashes () - -let encoding_table = Pdftext.reverse_table_of_encoding encoding - (*let () = iter (fun u -> @@ -29,117 +19,68 @@ let encoding_table = Pdftext.reverse_table_of_encoding encoding Printf.printf "glyph name %s --> " glyphname; let pdfcode = Hashtbl.find encoding_table glyphname in Printf.printf "pdf code %i\n" pdfcode) - unicodepoints - -let pdfcode_of_unicode_codepoint u = + unicodepoints *) + +let pdfcode_of_unicode_codepoint encoding_table glyphlist_table u = try Some (Hashtbl.find encoding_table (Hashtbl.find glyphlist_table [u])) with Not_found -> None -let tj_text = - implode - (map - (fun x -> match pdfcode_of_unicode_codepoint x with Some c -> char_of_int c | None -> '?') - unicodepoints) - -let calc_accepted_unicodepoints codepoints = +let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints = setify (option_map - (fun u -> match pdfcode_of_unicode_codepoint u with Some _ -> Some u | None -> None) + (fun u -> match pdfcode_of_unicode_codepoint encoding_table glyphlist_table u with Some _ -> Some u | None -> None) codepoints) -let accepted_unicodepoints = - map - (fun u -> (u, pdfcode_of_unicode_codepoint u)) - (calc_accepted_unicodepoints unicodepoints) - -let contents_of_file filename = - let ch = open_in_bin filename in - let s = really_input_string ch (in_channel_length ch) in - close_in ch; - s - -let fontname = "NotoSans-Black" -let fontstr = contents_of_file (fontname ^ ".ttf") - -let f = - Cpdftruetype.parse ~subset:accepted_unicodepoints (Pdfio.bytes_of_string fontstr) - -let contents = - "1 0 0 1 50 770 cm BT/TT1 36 Tf(" ^ tj_text ^ ")Tj ET" - -let widths = - Pdf.Array (map (fun x -> Pdf.Integer x) (Array.to_list f.Cpdftruetype.widths)) - let fontnum = ref 0 let basename () = incr fontnum; "AAAAA" ^ string_of_char (char_of_int (!fontnum + 65)) -let name_1 = basename () - let string_of_encoding = function | Pdftext.WinAnsiEncoding -> "/WinAnsiEncoding" | Pdftext.MacRomanEncoding -> "/MacRomanEncoding" | Pdftext.StandardEncoding -> "/StandardEncoding" | _ -> failwith "unknown encoding" -let font = - Pdf.add_dict_entry - (Pdfread.parse_single_object - (Printf.sprintf "<>" name_1 fontname (string_of_encoding encoding) f.Cpdftruetype.firstchar f.Cpdftruetype.lastchar)) - "/Widths" - widths - -let fontdescriptor = - Pdfread.parse_single_object - (Printf.sprintf "<>" - name_1 fontname f.Cpdftruetype.flags f.Cpdftruetype.minx f.Cpdftruetype.miny f.Cpdftruetype.maxx f.Cpdftruetype.maxy f.Cpdftruetype.italicangle - f.Cpdftruetype.ascent f.Cpdftruetype.descent f.Cpdftruetype.capheight f.Cpdftruetype.stemv f.Cpdftruetype.xheight f.Cpdftruetype.avgwidth f.Cpdftruetype.maxwidth) - -let fontfile = - let len = String.length fontstr in - Pdf.Stream - {contents = - (Pdf.Dictionary [("/Length", Pdf.Integer len); ("/Length1", Pdf.Integer len)], - Pdf.Got (Pdfio.bytes_of_string fontstr))} -*) - -(*let objects = - [(1, Pdfread.parse_single_object "<>"); - (2, Pdfread.parse_single_object "<>"); - (3, Pdfread.parse_single_object "<>>>/Parent 2 0 R/MediaBox[0 0 595 842]/Rotate 0/Contents[4 0 R]>>"); - (4, Pdf.Stream - {contents = (Pdf.Dictionary [("/Length", Pdf.Integer (String.length contents))], - (Pdf.Got (Pdfio.bytes_of_string contents)))}); - (5, font); - (6, fontdescriptor); - (7, fontfile); - ] - -let root = 1 - -let trailerdict = - Pdfread.parse_single_object (Printf.sprintf "<>" (length objects + 1)) - -let pdf = - let pdf = - {Pdf.major = 2; - Pdf.minor = 0; - Pdf.root = root; - Pdf.objects = - {Pdf.maxobjnum = 0; - Pdf.parse = None; - Pdf.pdfobjects = Pdf.pdfobjmap_empty (); - Pdf.object_stream_ids = null_hash ()}; - Pdf.trailerdict = trailerdict; - Pdf.was_linearized = false; - Pdf.saved_encryption = None} +let embed_truetype pdf ~fontfile ~fontname ~text ~encoding = + let unicodepoints = Pdftext.codepoints_of_utf8 text in + let glyphlist_table = Pdfglyphlist.reverse_glyph_hashes () in + let encoding_table = Pdftext.reverse_table_of_encoding encoding in + let accepted_unicodepoints = + map + (fun u -> (u, pdfcode_of_unicode_codepoint encoding_table glyphlist_table u)) + (calc_accepted_unicodepoints encoding_table glyphlist_table unicodepoints) in - iter (Pdf.addobj_given_num pdf) objects; - pdf - -let () = - Pdfwrite.pdf_to_file pdf "subset.pdf"*) + let f = + Cpdftruetype.parse ~subset:accepted_unicodepoints fontfile + in + let widths = + Pdf.Array (map (fun x -> Pdf.Integer x) (Array.to_list f.Cpdftruetype.widths)) + in + let name_1 = basename () in + let fontfile = + let len = Pdfio.bytes_size fontfile in + Pdf.Stream + {contents = + (Pdf.Dictionary [("/Length", Pdf.Integer len); ("/Length1", Pdf.Integer len)], + Pdf.Got fontfile)} + in + let fontfile_num = Pdf.addobj pdf fontfile in + let fontdescriptor = + Pdfread.parse_single_object + (Printf.sprintf "<>" + name_1 fontname f.Cpdftruetype.flags f.Cpdftruetype.minx f.Cpdftruetype.miny f.Cpdftruetype.maxx f.Cpdftruetype.maxy f.Cpdftruetype.italicangle + f.Cpdftruetype.ascent f.Cpdftruetype.descent f.Cpdftruetype.capheight f.Cpdftruetype.stemv f.Cpdftruetype.xheight f.Cpdftruetype.avgwidth f.Cpdftruetype.maxwidth fontfile_num) + in + let fontdesc_num = Pdf.addobj pdf fontdescriptor in + let font = + Pdf.add_dict_entry + (Pdfread.parse_single_object + (Printf.sprintf "<>" name_1 fontname fontdesc_num (string_of_encoding encoding) f.Cpdftruetype.firstchar f.Cpdftruetype.lastchar)) + "/Widths" + widths + in + Pdf.addobj pdf font diff --git a/cpdfembed.mli b/cpdfembed.mli index 948db8f..b396a59 100644 --- a/cpdfembed.mli +++ b/cpdfembed.mli @@ -1 +1,4 @@ -(* *) +(* Embed a TrueType font for the given set of UTF8 characters in the given + encoding, adding it as an object to the PDF, and returning the number of + that object. *) +val embed_truetype : Pdf.t -> fontfile:Pdfio.bytes -> fontname:string -> text:string -> encoding:Pdftext.encoding -> int