More info in PDF/UA verifications

This commit is contained in:
John Whitington 2025-03-24 15:59:28 +00:00
parent 0c1ff247b5
commit 19736bc147
2 changed files with 14 additions and 11 deletions

View File

@ -13,6 +13,7 @@ o -obj JSON output
* -print-dict-entry, -remove-dict-entry and -replace-dict-entry * -print-dict-entry, -remove-dict-entry and -replace-dict-entry
can follow a chain from each found dictionary entry can follow a chain from each found dictionary entry
* Much more of the PDF/UA Matterhorn verification suite implemented * Much more of the PDF/UA Matterhorn verification suite implemented
* PDF/UA verifier now returns extra information with most failures
* Font lister now returns the font itself * Font lister now returns the font itself
o Allow %objnum in output name for -extract-images o Allow %objnum in output name for -extract-images
* -show-boxes, -trim-marks, -table-of-contents, -add-text * -show-boxes, -trim-marks, -table-of-contents, -add-text

View File

@ -502,14 +502,14 @@ let matterhorn_10_001 _ _ pdf =
| Some d -> | Some d ->
begin match Pdf.lookup_direct pdf "/Differences" d with begin match Pdf.lookup_direct pdf "/Differences" d with
| Some diffs -> check_diffs diffs (* b) 2 *) | Some diffs -> check_diffs diffs (* b) 2 *)
| None -> merror () | None -> merror_str "No /Differences"
end end
| None -> | None ->
match Pdf.lookup_direct pdf "/Subtype" font with match Pdf.lookup_direct pdf "/Subtype" font with
| Some (Pdf.Name "/Type0") -> | Some (Pdf.Name "/Type0") ->
(* c) *) (* c) *)
unimpl () unimpl ()
| _ -> merror () | _ -> merror_str "Not a Type 0 font"
in in
let fonts = map (fun (_, _, _, _, _, x) -> x) (Cpdffont.list_fonts pdf (ilist 1 (Pdfpage.endpage pdf))) in let fonts = map (fun (_, _, _, _, _, x) -> x) (Cpdffont.list_fonts pdf (ilist 1 (Pdfpage.endpage pdf))) in
iter iter
@ -525,7 +525,8 @@ let matterhorn_10_001 _ _ pdf =
(* Natural language for text in page content cannot be determined. *) (* Natural language for text in page content cannot be determined. *)
let matterhorn_11_001 _ _ pdf = let matterhorn_11_001 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with
| Some (Pdf.String "") | None -> merror_str "No top-level /Lang" | Some (Pdf.String "") -> merror_str "Top-level /Lang is empty"
| None -> merror_str "No top-level /Lang"
| Some _ -> () | Some _ -> ()
(* Natural language for text in Alt, ActualText and E attributes cannot be (* Natural language for text in Alt, ActualText and E attributes cannot be
@ -572,7 +573,7 @@ let rec headings_list_of_tree (E (n, cs)) =
let matterhorn_14_002 st _ _ = let matterhorn_14_002 st _ _ =
match headings_list_of_tree st with match headings_list_of_tree st with
| [] | "/H1"::_ -> () | [] | "/H1"::_ -> ()
| _ -> merror () | x::_ -> merror_str x
(* Numbered heading levels in descending sequence are skipped (Example: <H3> (* Numbered heading levels in descending sequence are skipped (Example: <H3>
follows directly after <H1>). *) follows directly after <H1>). *)
@ -664,7 +665,8 @@ let matterhorn_20_002 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"], match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"],
Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/Name"] Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/Name"]
with with
| Some _, (Some (Pdf.String "") | None) -> merror () | Some _, Some (Pdf.String "") -> merror_str "empty string"
| Some _, None -> merror_str "not present"
| _ -> () | _ -> ()
(* An AS entry appears in an Optional Content Configuration Dictionary. *) (* An AS entry appears in an Optional Content Configuration Dictionary. *)
@ -864,12 +866,12 @@ let matterhorn_28_005 _ _ pdf =
begin match List.assoc_opt (string_of_int i) parent_tree with begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d -> | Some d ->
begin match Pdf.lookup_direct pdf "/Alt" d with begin match Pdf.lookup_direct pdf "/Alt" d with
| None -> merror () | None -> merror_str "no /Alt"
| _ -> () | _ -> ()
end end
| _ -> merror () | _ -> merror_str "no parent tree entry"
end end
| _ -> merror ()) | _ -> merror_str "no /StructParent")
missing_tu missing_tu
(* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *) (* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *)
@ -926,11 +928,11 @@ let matterhorn_28_010 _ _ pdf =
| Some d -> | Some d ->
begin match Pdf.lookup_direct pdf "/S" d with begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Form") -> () | Some (Pdf.Name "/Form") -> ()
| _ -> merror () | _ -> merror_str "type is not form"
end end
| _ -> merror () | _ -> merror_str "not found in parent tree"
end end
| _ -> merror () | _ -> merror_str "not in structure tree"
end end
| _ -> ()) | _ -> ())
pdf pdf