Only print font entries which are in charset
This commit is contained in:
parent
87c82dbbf0
commit
847b120090
|
@ -3352,6 +3352,11 @@ let print_font_encoding pdf fontname pagenumber =
|
||||||
match !font with Some f -> f | None -> failwith (Printf.sprintf "print_font_encoding: font %s not found" fontname)
|
match !font with Some f -> f | None -> failwith (Printf.sprintf "print_font_encoding: font %s not found" fontname)
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
|
let charset =
|
||||||
|
match Pdftext.read_font pdf font with
|
||||||
|
| Pdftext.SimpleFont {Pdftext.fontdescriptor = Some {Pdftext.charset = Some cs}} -> Some cs
|
||||||
|
| _ -> None
|
||||||
|
in
|
||||||
let extractor = Pdftext.text_extractor_of_font pdf font in
|
let extractor = Pdftext.text_extractor_of_font pdf font in
|
||||||
let unicodedata = Cpdfunicodedata.unicodedata () in
|
let unicodedata = Cpdfunicodedata.unicodedata () in
|
||||||
let unicodetable = Hashtbl.create 16000 in
|
let unicodetable = Hashtbl.create 16000 in
|
||||||
|
@ -3381,7 +3386,8 @@ let print_font_encoding pdf fontname pagenumber =
|
||||||
in
|
in
|
||||||
let utf8 = if is_control then "<nonprintable>" else Pdftext.utf8_of_codepoints codepoints in
|
let utf8 = if is_control then "<nonprintable>" else Pdftext.utf8_of_codepoints codepoints in
|
||||||
let glyphnames = fold_left ( ^ ) "" (Pdftext.glyphnames_of_text extractor str) in
|
let glyphnames = fold_left ( ^ ) "" (Pdftext.glyphnames_of_text extractor str) in
|
||||||
if glyphnames <> ".notdef" then
|
let is_in_charset s = match charset with None -> true | Some cs -> mem s cs in
|
||||||
|
if glyphnames <> ".notdef" && is_in_charset glyphnames then
|
||||||
Printf.printf
|
Printf.printf
|
||||||
"%i = U+%s (%s - %s) = %s\n" x unicodenumber utf8 unicodename glyphnames
|
"%i = U+%s (%s - %s) = %s\n" x unicodenumber utf8 unicodename glyphnames
|
||||||
done
|
done
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
%Document -list-annotations-json
|
%Document -list-annotations-json
|
||||||
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
|
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
|
||||||
%Document new text lookup for -add-text and new -raw mode
|
%Document new text lookup for -add-text and new -raw mode
|
||||||
|
%Document -print-font-table
|
||||||
\documentclass{book}
|
\documentclass{book}
|
||||||
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
|
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
|
||||||
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}
|
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}
|
||||||
|
|
Loading…
Reference in New Issue