Font table printer now does unicode, much better
This commit is contained in:
parent
46b884bf47
commit
2fb55d514d
|
@ -3353,12 +3353,37 @@ let print_font_encoding pdf fontname pagenumber =
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
let extractor = Pdftext.text_extractor_of_font pdf font in
|
let extractor = Pdftext.text_extractor_of_font pdf font in
|
||||||
|
let unicodedata = Cpdfunicodedata.unicodedata () in
|
||||||
|
let unicodetable = Hashtbl.create 16000 in
|
||||||
|
iter
|
||||||
|
(fun x ->
|
||||||
|
Hashtbl.add
|
||||||
|
unicodetable
|
||||||
|
(int_of_string ("0x" ^ x.Cpdfunicodedata.code_value))
|
||||||
|
(x.Cpdfunicodedata.code_value,
|
||||||
|
x.Cpdfunicodedata.general_category,
|
||||||
|
x.Cpdfunicodedata.character_name,
|
||||||
|
x.Cpdfunicodedata.iso_10646_comment_field))
|
||||||
|
unicodedata;
|
||||||
for x = 0 to 255 do
|
for x = 0 to 255 do
|
||||||
let str = string_of_char (char_of_int x) in
|
let str = string_of_char (char_of_int x) in
|
||||||
Printf.printf "%i = %s = %s\n"
|
let codepoints = Pdftext.codepoints_of_text extractor str in
|
||||||
x
|
let unicodenumber, unicodename, is_control =
|
||||||
(Pdftext.utf8_of_codepoints (Pdftext.codepoints_of_text extractor str))
|
match codepoints with
|
||||||
(fold_left ( ^ ) "" (Pdftext.glyphnames_of_text extractor str))
|
| [c] ->
|
||||||
|
begin try
|
||||||
|
let codeval, category, character_name, comment = Hashtbl.find unicodetable c in
|
||||||
|
codeval, character_name, category = "Cc"
|
||||||
|
with
|
||||||
|
Not_found -> "", "", false
|
||||||
|
end
|
||||||
|
| _ -> "***multiple", "***multiple", false
|
||||||
|
in
|
||||||
|
let utf8 = if is_control then "<nonprintable>" else Pdftext.utf8_of_codepoints codepoints in
|
||||||
|
let glyphnames = fold_left ( ^ ) "" (Pdftext.glyphnames_of_text extractor str) in
|
||||||
|
if glyphnames <> ".notdef" then
|
||||||
|
Printf.printf
|
||||||
|
"%i = U+%s (%s - %s) = %s\n" x unicodenumber utf8 unicodename glyphnames
|
||||||
done
|
done
|
||||||
| _ -> failwith "addtext: font not found for width"
|
| _ -> failwith "addtext: font not found for width"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue