more

2025-06-05 22:09:39 +02:00 · 2022-11-22 15:09:21 +00:00
parent 2e1e343d0c
commit 08187a396d
2 changed files with 26 additions and 8 deletions
--- a/cpdfembed.ml
+++ b/cpdfembed.ml
@ -20,7 +20,8 @@ let pdfcode_of_unicode_codepoint encoding_table glyphlist_table u =
    Not_found -> None
 let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints =
-  setify
+  sort (fun a b -> compare b a) (* FIXME: Remove once subset experiment gone *)
  (setify
    (option_map
       (fun u ->
          match
@ -28,7 +29,7 @@ let calc_accepted_unicodepoints encoding_table glyphlist_table codepoints =
          with
          | Some _ -> Some u
          | None -> None)
-       codepoints)
+       codepoints))
 let fontnum = ref 0
--- a/cpdftruetype.ml
+++ b/cpdftruetype.ml
@ -25,7 +25,7 @@ type t =
   subset : int list;
   tounicode : (int, string) Hashtbl.t option}
-let dbg = ref false
+let dbg = ref true
 let required_tables =
  ["head"; "hhea"; "loca"; "cmap"; "maxp"; "cvt "; "glyf"; "prep"; "hmtx"; "fpgm"]
@ -120,24 +120,33 @@ let read_format_4_encoding_table b =
    t
 let print_encoding_table (table : (int, int) Hashtbl.t) =
-  let l = list_of_hashtbl table in
+  let unicodedata = Cpdfunicodedata.unicodedata () in
  let unicodetable = Hashtbl.create 16000 in
   iter
    (fun x ->
       Hashtbl.add unicodetable x.Cpdfunicodedata.code_value x.Cpdfunicodedata.character_name)
    unicodedata;
  let l = sort compare (list_of_hashtbl table) in
  Printf.printf "There are %i characters in this font\n" (length l);
  iter
-    (fun (c, gi) -> Printf.printf "Char %04X is at glyph index %i\n" c gi)
+    (fun (c, gi) ->
      let str = Printf.sprintf "%04X" c in
      Printf.printf "Char %s (%s) is at glyph index %i\n" str (Hashtbl.find unicodetable str) gi)
    l
 let read_encoding_table fmt length version b =
  match fmt with
  | 0 ->
-      (*Printf.printf "read_encoding_table: format 0\n";*)
+      if !dbg then Printf.printf "read_encoding_table: format 0\n";
      let t = null_hash () in
        for x = 0 to 255 do Hashtbl.add t x (read_byte b) done;
        print_encoding_table t;
        t
  | 4 ->
-      (*Printf.printf "read_encoding_table: format 4\n";*)
+      if !dbg then Printf.printf "read_encoding_table: format 4\n";
      read_format_4_encoding_table b
  | 6 ->
-      (*Printf.printf "read_encoding_table: format 6\n";*)
+      if !dbg then Printf.printf "read_encoding_table: format 6\n";
      read_format_6_encoding_table b
  | n -> raise (Pdf.PDFError "read_encoding_table: format %i not known\n%!")
@ -399,6 +408,9 @@ let subset_font major minor tables indexToLocFormat subset encoding cmap loca mk
    bytes
 let parse ?(subset=[]) data encoding =
  Printf.printf "********SUBSET is ";
  iter (Printf.printf "%i ") subset;
  Printf.printf "\n";
  let mk_b byte_offset = bitbytes_of_input (let i = input_of_bytes data in i.seek_in byte_offset; i) in
  let b = mk_b 0 in
  let major, minor = read_fixed b in
@ -503,6 +515,11 @@ let parse ?(subset=[]) data encoding =
          in
            let subset_1 = if subset = [] then [] else if fontpack_experiment then tl subset else subset in
            let subset_2 = if subset = [] then [] else [hd subset] in
            if !dbg && subset <> [] then
              begin
                Printf.printf "***********Chars for experimental main WinAnsiEncoding set: %i %i\n" (hd subset_1) (hd (tl subset_1));
                Printf.printf "***********Chars for experimental higher set: %i\n" (hd subset_2);
              end;
            let flags = calculate_flags italicangle in
            let firstchar_1, lastchar_1 = calculate_limits subset_1 in
            let firstchar_2, lastchar_2 = calculate_limits subset_2 in