Skeleton for returning actual fonts in list_fonts

This commit is contained in:
John Whitington
2025-01-28 16:13:59 +08:00
parent d05e9aa411
commit 930b162969
4 changed files with 23 additions and 13 deletions

11
Changes
View File

@ -3,17 +3,20 @@
Extended features:
o -obj JSON output
o -obj/-obj-json can follow a chain from an object number
o -obj/-obj-json can explore through arrays and name/number trees
o -print-dict-entry, -remove-dict-entry and -replace-dict-entry
* -obj/-obj-json can follow a chain from an object number
* -obj/-obj-json can explore through arrays and name/number trees
* -print-dict-entry, -remove-dict-entry and -replace-dict-entry
can follow a chain from each found dictionary entry
o More of the PDF/UA Matterhorn verification suite implemented
* More of the PDF/UA Matterhorn verification suite implemented
* Font lister now returns the font itself
Fixes:
o Harden auto-compression against malformed streams
o Add backup JPEG dimensions method in Cpdfimage
* = Supported by a grant from NLnet
2.8 (December 2024)
New features:

View File

@ -268,7 +268,7 @@ let list_font pdf page (name, dict) =
| Some (Pdf.Name n) -> Pdfwrite.string_of_pdf (Pdf.Name n)
| _ -> ""
in
(page, name, subtype, basefont, encoding)
(page, name, subtype, basefont, encoding, Pdf.Null)
(* List the fonts used in an xobject, and in any of the xobjects it has. Do not
process an xobject twice. *)
@ -321,10 +321,10 @@ let list_fonts pdf range =
[])
(combine (ilist 1 (length pages)) pages))
let string_of_font (p, n, s, b, e) =
let string_of_font (p, n, s, b, e, _) =
Printf.sprintf "%i %s %s %s %s\n" p n s b e
let json_of_font (pagenum, name, subtype, basefont, encoding) =
let json_of_font (pagenum, name, subtype, basefont, encoding, _) =
`Assoc
[("page", `Int pagenum);
("name", `String name);

View File

@ -6,7 +6,7 @@
val print_fonts : ?json:bool -> Pdf.t -> int list -> unit
(** Return font list. Page number, name, subtype, basefont, encoding. *)
val list_fonts : Pdf.t -> int list -> (int * string * string * string * string) list
val list_fonts : Pdf.t -> int list -> (int * string * string * string * string * Pdf.pdfobject) list
(** Return font list in JSON format *)
val json_fonts : Pdf.t -> int list -> Cpdfyojson.Safe.t

View File

@ -468,6 +468,7 @@ in
if not (List.for_all (mem' allowed_names) names) then merror ()
in
let check_font font =
Printf.printf "Check font: %s\n" (Pdfwrite.string_of_pdf font);
match Pdf.lookup_direct pdf "/ToUnicode" font with
| Some _ -> (* a) *) ()
| _ ->
@ -485,12 +486,18 @@ in
unimpl ()
| _ -> merror ()
in
(* FIXME Not all object numbers, because text extraction need not be
possible on fonts referenced only from within AcroForms. Also fonts may be
direct and not even object numbers at all. So, instead, return the list of
fonts from a file just like -list-fonts and use those fonts. *)
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/Subtype" o with
| Some (Pdf.Name "/Font"), Some (Pdf.Name ("/CIDFontType0" | "/CIDFontType2")) -> ()
| Some (Pdf.Name "/Font"), _ -> check_font o
| _ -> ())
(fun o _ ->
let o = Pdf.lookup_obj pdf o in
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/Subtype" o with
| Some (Pdf.Name "/Font"), Some (Pdf.Name ("/CIDFontType0" | "/CIDFontType2")) -> ()
| Some (Pdf.Name "/Font"), _ -> check_font o
| _ -> ())
pdf
(* If the top-level /Lang is present, that rules all and is sufficient. *)