From a9204a03652046dd6c8579257673893e1ae183d3 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 13 Jul 2023 14:44:57 +0100 Subject: [PATCH] Move collate into cpdfembed, patch up -draw --- cpdfdraw.ml | 28 ++++++++++++++++------------ cpdfembed.ml | 15 +++++++++++++++ cpdfembed.mli | 2 ++ cpdftoc.ml | 16 +--------------- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/cpdfdraw.ml b/cpdfdraw.ml index 3942267..80afcc6 100644 --- a/cpdfdraw.ml +++ b/cpdfdraw.ml @@ -136,17 +136,21 @@ let process_specials pdf endpage filename bates batespad num page s = in Cpdfaddtext.process_text (res ()).time s pairs -(* FIXME: implement for other kinds of font *) let runs_of_utf8 s = - match (res ()).current_fontpack with - | ((f::_, _) as fontpack) -> - let codepoints = Pdftext.codepoints_of_utf8 s in - let charcodes = option_map (Cpdfembed.get_char fontpack) codepoints in - let fontname = - fst (Hashtbl.find (res ()).fonts f) - in - [Pdfops.Op_Tf (fontname, (res ()).font_size); Pdfops.Op_Tj (implode (map (fun (c, _, _) -> char_of_int c) charcodes))] - | _ -> failwith "charcodes_of_utf8: unknown font" + let fontpack = (res ()).current_fontpack in + let codepoints = Pdftext.codepoints_of_utf8 s in + let triples = option_map (Cpdfembed.get_char fontpack) codepoints in + let collated = Cpdfembed.collate_runs triples in + flatten + (map + (fun l -> + if l = [] then [] else + let f = match l with (_, _, f)::_ -> f | _ -> assert false in + let fontname = fst (Hashtbl.find (res ()).fonts f) in + let charcodes = map (fun (c, _, _) -> char_of_int c) l in + [Pdfops.Op_Tf (fontname, (res ()).font_size); + Pdfops.Op_Tj (implode charcodes)]) + collated) let extgstate kind v = try Hashtbl.find (res ()).extgstates (kind, v) with @@ -239,9 +243,9 @@ let rec ops_of_drawop pdf endpage filename bates batespad num page = function match cpdffont with | PreMadeFontPack fp -> fp | EmbedInfo {fontfile; fontname; encoding} -> - Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints:[int_of_char 'a'] ~encoding + Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints:[int_of_char 'H'] ~encoding | ExistingNamedFont -> - error "-draw does not support using an exsiting named font" + error "-draw does not support using an existing named font" in let ns = map diff --git a/cpdfembed.ml b/cpdfembed.ml index 0840ca6..7f3d54b 100644 --- a/cpdfembed.ml +++ b/cpdfembed.ml @@ -92,3 +92,18 @@ let embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding = let fs = Cpdftruetype.parse ~subset:codepoints fontfile encoding in let subsets_and_their_fonts = map (make_single_font ~fontname ~encoding pdf) fs in (map snd subsets_and_their_fonts, make_fontpack_hashtable subsets_and_their_fonts) + +let rec collate_runs cfn a = function + | [] -> rev (map rev a) + | (charcode, fontnum, font) as h::t -> + match a with + | [] -> collate_runs fontnum [[h]] t + | this::rest -> + if fontnum = cfn + then collate_runs cfn ((h::this)::rest) t + else collate_runs fontnum ([h]::this::rest) t + +let collate_runs = function + | [] -> [] + | (_, fontnum, _)::_ as l -> collate_runs fontnum [] l + diff --git a/cpdfembed.mli b/cpdfembed.mli index 4592a51..7cdadd7 100644 --- a/cpdfembed.mli +++ b/cpdfembed.mli @@ -16,3 +16,5 @@ val get_char : t -> int -> (int * int * Pdftext.font) option val embed_truetype : Pdf.t -> fontfile:Pdfio.bytes -> fontname:string -> codepoints:int list -> encoding:Pdftext.encoding -> t + +val collate_runs : ('a * 'b * 'c) list -> ('a * 'b * 'c) list list diff --git a/cpdftoc.ml b/cpdftoc.ml index efd326a..e4bb34a 100644 --- a/cpdftoc.ml +++ b/cpdftoc.ml @@ -14,20 +14,6 @@ let rec real_newline = function | x::r -> x::real_newline r | [] -> [] -let rec collate_runs cfn a = function - | [] -> rev (map rev a) - | (charcode, fontnum, font) as h::t -> - match a with - | [] -> collate_runs fontnum [[h]] t - | this::rest -> - if fontnum = cfn - then collate_runs cfn ((h::this)::rest) t - else collate_runs fontnum ([h]::this::rest) t - -let collate_runs = function - | [] -> [] - | (_, fontnum, _)::_ as l -> collate_runs fontnum [] l - let rec width_of_runs runs = match runs with | Cpdftype.Font (f, fontsize)::Cpdftype.Text t::more -> @@ -39,7 +25,7 @@ let rec width_of_runs runs = let of_utf8 fontpack fontsize t = let codepoints = Pdftext.codepoints_of_utf8 t in let fonted = option_map (Cpdfembed.get_char fontpack) codepoints in - let collated = collate_runs fonted in + let collated = Cpdfembed.collate_runs fonted in flatten (map (function