Rehabilitating toc

This commit is contained in:
John Whitington 2023-07-11 16:53:21 +01:00
parent 5ec46e5140
commit 5852c184a8
2 changed files with 25 additions and 8 deletions

View File

@ -14,16 +14,33 @@ let rec real_newline = function
| x::r -> x::real_newline r
| [] -> []
let rec collate_runs cfn a = function
| [] -> rev (map rev a)
| (charcode, fontnum, font) as h::t ->
match a with
| [] -> collate_runs fontnum [[h]] t
| this::rest ->
if fontnum = cfn
then collate_runs cfn ((h::this)::rest) t
else collate_runs fontnum ([h]::this::rest) t
let collate_runs = function
| [] -> []
| (_, fontnum, _)::_ as l -> collate_runs fontnum [] l
(* Run of Font / Text elements from a fontpack and UTF8 text *)
let of_utf8 fontpack fontsize t =
(* Use the fontpack to find which font is required, then return runs
of [Font (font, size); Text t] in a list *)
let codepoints = Pdftext.codepoints_of_utf8 t in
(* 1. Text to (charcode, font, fontnum) *)
let fonted = option_map (Cpdfembed.get_char fontpack) codepoints in
(* 2. FIXME Collate them *)
(* 3. Produce the runs, converting to char *)
flatten (map (function (charcode, fontnum, font) -> [Cpdftype.Font (font, fontsize); Cpdftype.Text [char_of_int charcode]]) fonted)
let collated = collate_runs fonted in
flatten
(map
(function
| [] -> []
| (_, _, font) as h::t ->
let charcodes = map (fun (c, _, _) -> char_of_int c) (h::t) in
[Cpdftype.Font (font, fontsize); Cpdftype.Text charcodes])
collated)
(* Cpdftype codepoints from a font and PDFDocEndoding string *)
let of_pdfdocencoding fontpack fontsize t =

View File

@ -289,7 +289,7 @@ let write_glyf_table subset cmap bs mk_b glyfoffset loca =
with
Not_found -> ())
subset;
let locnums = expand_composites mk_b loca glyfoffset (sort compare (map fst (list_of_hashtbl locnums))) in
let locnums = (*expand_composites mk_b loca glyfoffset*) (sort compare (map fst (list_of_hashtbl locnums))) in
(*if !dbg then*)
(Printf.printf "We want glyfs for locations: ";
iter (Printf.printf "%i ") locnums; Printf.printf "\n");
@ -680,7 +680,7 @@ let parse ~subset data encoding =
write_font "one.ttf" one.subset_fontfile;
(*Printf.printf "\nHigher subset:\n";
debug_t (hd twos);*)
write_font "two.ttf" (hd twos).subset_fontfile;
if twos <> [] then write_font "two.ttf" (hd twos).subset_fontfile;
one::twos
let parse ~subset data encoding =