From 6a92be56cf156a8d5b70ec354787b1f934a6c317 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 18 Jul 2023 14:05:17 +0100 Subject: [PATCH] Beginning -add-text/TTF --- cpdfaddtext.ml | 38 ++++++++++++++++++-------------------- cpdfembed.ml | 1 - cpdftruetype.ml | 17 ++++++++++------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/cpdfaddtext.ml b/cpdfaddtext.ml index f84de59..63662e3 100644 --- a/cpdfaddtext.ml +++ b/cpdfaddtext.ml @@ -231,7 +231,7 @@ let expand_lines text time pdf endpage extract_text_font_size filename bates bat let addtext time lines linewidth outline fast colour fontname encoding bates batespad - fontsize fontpack font fontpdfobj underneath position hoffset voffset text pages + fontsize fontpack font fontpdfobj fontpackpdfobjs underneath position hoffset voffset text pages cropbox opacity justification filename extract_text_font_size shift raw pdf = let endpage = Pdfpage.endpage pdf in @@ -513,27 +513,25 @@ let | Some (Pdftext.SimpleFont {encoding}) -> encoding | _ -> Pdftext.WinAnsiEncoding in - iter - (fun line -> - let voff, hoff = !voffset, 0. in - pdf := - addtext time lines linewidth outline fast colour !realfontname encoding - bates batespad fontsize fontpack font fontpdfobj underneath position hoff voff line - pages cropbox opacity justification filename extract_text_font_size shift - raw !pdf; - voffset := !voffset +. (linespacing *. fontsize)) - lines; - (* FIXME Here we need to embed all the fonts, not just one *) - begin match cpdffont with + let fontpackpdfobjs = + match cpdffont with | Cpdfembed.EmbedInfo {fontfile; fontname; encoding} -> let codepoints = map fst (list_of_hashtbl used) in - let objnum = match fontpdfobj with Pdf.Indirect i -> i | _ -> failwith "bad fontpdfobj" in - let font = hd (fst (Cpdfembed.embed_truetype !pdf ~fontfile ~fontname ~codepoints ~encoding)) in - ignore (Pdftext.write_font ~objnum !pdf font) - | _ -> () - end; - !pdf - + let fonts = fst (Cpdfembed.embed_truetype !pdf ~fontfile ~fontname ~codepoints ~encoding) in + map (Pdftext.write_font !pdf) fonts + | _ -> [] + in + iter + (fun line -> + let voff, hoff = !voffset, 0. in + pdf := + addtext time lines linewidth outline fast colour !realfontname encoding + bates batespad fontsize fontpack font fontpdfobj fontpackpdfobjs underneath + position hoff voff line pages cropbox opacity justification filename + extract_text_font_size shift raw !pdf; + voffset := !voffset +. (linespacing *. fontsize)) + lines; + !pdf let addrectangle fast (w, h) colour outline linewidth opacity position relative_to_cropbox diff --git a/cpdfembed.ml b/cpdfembed.ml index 7f3d54b..88b1fba 100644 --- a/cpdfembed.ml +++ b/cpdfembed.ml @@ -106,4 +106,3 @@ let rec collate_runs cfn a = function let collate_runs = function | [] -> [] | (_, fontnum, _)::_ as l -> collate_runs fontnum [] l - diff --git a/cpdftruetype.ml b/cpdftruetype.ml index 2e58bbf..5078541 100644 --- a/cpdftruetype.ml +++ b/cpdftruetype.ml @@ -6,6 +6,8 @@ let dbg = (* Pdfe.logger := (fun s -> print_string s; flush stdout) *) ref false +let test_subsetting = true + type t = {flags : int; minx : int; @@ -496,13 +498,14 @@ let write_font filename data = close_out fh let find_main encoding subset = - let encoding_table = Pdftext.table_of_encoding encoding in - let first, rest = - List.partition - (fun u -> try ignore (Hashtbl.find encoding_table u); true with Not_found -> false) - subset - in - (first, splitinto 224 rest) + if test_subsetting then (take subset 3, [drop subset 3]) else + let encoding_table = Pdftext.table_of_encoding encoding in + let first, rest = + List.partition + (fun u -> try ignore (Hashtbl.find encoding_table u); true with Not_found -> false) + subset + in + (first, splitinto 224 rest) let parse ~subset data encoding = let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in