Only print font entries which are in charset

This commit is contained in:
John Whitington 2021-11-12 10:07:37 -08:00
parent 87c82dbbf0
commit 847b120090
2 changed files with 8 additions and 1 deletions

View File

@ -3352,6 +3352,11 @@ let print_font_encoding pdf fontname pagenumber =
match !font with Some f -> f | None -> failwith (Printf.sprintf "print_font_encoding: font %s not found" fontname)
end
in
let charset =
match Pdftext.read_font pdf font with
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs
| _ -> None
in
let extractor = Pdftext.text_extractor_of_font pdf font in
let unicodedata = Cpdfunicodedata.unicodedata () in
let unicodetable = Hashtbl.create 16000 in
@ -3381,7 +3386,8 @@ let print_font_encoding pdf fontname pagenumber =
in
let utf8 = if is_control then "<nonprintable>" else Pdftext.utf8_of_codepoints codepoints in
let glyphnames = fold_left ( ^ ) "" (Pdftext.glyphnames_of_text extractor str) in
if glyphnames <> ".notdef" then
let is_in_charset s = match charset with None -> true | Some cs -> mem s cs in
if glyphnames <> ".notdef" && is_in_charset glyphnames then
Printf.printf
"%i = U+%s (%s - %s) = %s\n" x unicodenumber utf8 unicodename glyphnames
done

View File

@ -6,6 +6,7 @@
%Document -list-annotations-json
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
%Document new text lookup for -add-text and new -raw mode
%Document -print-font-table
\documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}