More matterhorn fixes

This commit is contained in:
John Whitington 2024-06-26 16:43:45 +01:00
parent b0c630d822
commit 4d0fc879c4
1 changed files with 95 additions and 52 deletions

View File

@ -583,9 +583,15 @@ let matterhorn_26_002 _ _ pdf =
| Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} -> | Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} ->
if mem Pdfcrypt.NoExtract (Pdfcrypt.banlist_of_p p) then merror () if mem Pdfcrypt.NoExtract (Pdfcrypt.banlist_of_p p) then merror ()
let read_parent_tree pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/ParentTree"] with
| Some t -> Pdftree.read_number_tree pdf t
| None -> []
(* An annotation, other than of subtype Widget, Link and PrinterMark, is not a (* An annotation, other than of subtype Widget, Link and PrinterMark, is not a
direct child of an <Annot> structure element. *) direct child of an <Annot> structure element. *)
let matterhorn_28_002 _ _ pdf = let matterhorn_28_002 _ _ pdf =
let parent_tree = read_parent_tree pdf in
(* Find object numbers of all annotations which are not Widget, Link, or Printermark. *) (* Find object numbers of all annotations which are not Widget, Link, or Printermark. *)
Pdf.objiter Pdf.objiter
(fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with (fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with
@ -597,10 +603,18 @@ let matterhorn_28_002 _ _ pdf =
(* Check that every /StructParent entry for each of these points to something (* Check that every /StructParent entry for each of these points to something
with /S /Annot. No need to worry about rolemapping, because PDF/UA docs with /S /Annot. No need to worry about rolemapping, because PDF/UA docs
aren't allowed to remap standard types. *) aren't allowed to remap standard types. *)
begin match Pdf.lookup_chain pdf obj ["/StructParent"; "/S"] with begin match Pdf.lookup_direct pdf "/StructParent" obj with
| Some (Pdf.Integer i) ->
begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d ->
begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Annot") -> () | Some (Pdf.Name "/Annot") -> ()
| _ -> merror () | _ -> merror ()
end end
| None -> merror ()
end
| _ -> merror ()
end
| _ -> ()) | _ -> ())
pdf pdf
@ -608,6 +622,7 @@ let matterhorn_28_002 _ _ pdf =
and does not have an alternative description (in the form of an Alt entry in and does not have an alternative description (in the form of an Alt entry in
the enclosing structure element). *) the enclosing structure element). *)
let matterhorn_28_004 _ _ pdf = let matterhorn_28_004 _ _ pdf =
let parent_tree = read_parent_tree pdf in
Pdf.objiter Pdf.objiter
(fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with (fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with
| Some (Pdf.Name | Some (Pdf.Name
@ -615,13 +630,23 @@ let matterhorn_28_004 _ _ pdf =
"/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" | "/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" |
"/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" | "/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" |
"/Watermark" | "/3D" | "/Link" | "/PrinterMark")) -> "/Watermark" | "/3D" | "/Link" | "/PrinterMark")) ->
Printf.printf "%S\n" (Pdfwrite.string_of_pdf obj);
begin match Pdf.lookup_direct pdf "/Contents" obj with begin match Pdf.lookup_direct pdf "/Contents" obj with
| Some _ -> () | Some _ -> ()
| None -> | None ->
begin match Pdf.lookup_chain pdf obj ["/StructParent"; "/Alt"] with begin match Pdf.lookup_direct pdf "/StructParent" obj with
| Some (Pdf.Integer i) ->
begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d ->
begin match Pdf.lookup_direct pdf "/Alt" d with
| Some _ -> () | Some _ -> ()
| _ -> merror () | _ -> merror ()
end end
| None -> merror ()
end
| _ -> merror ()
end
end end
| _ -> ()) | _ -> ())
pdf pdf
@ -675,27 +700,45 @@ let matterhorn_28_009 _ _ pdf =
(* A widget annotation is not nested within a <Form> tag. *) (* A widget annotation is not nested within a <Form> tag. *)
let matterhorn_28_010 _ _ pdf = let matterhorn_28_010 _ _ pdf =
let parent_tree = read_parent_tree pdf in
Pdf.objiter Pdf.objiter
(fun _ o -> (fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with match Pdf.lookup_direct pdf "/Subtype" o with
| Some (Pdf.Name "/Widget") -> | Some (Pdf.Name "/Widget") ->
begin match Pdf.lookup_chain pdf o ["/StructParent"; "/S"] with begin match Pdf.lookup_direct pdf "/StructParent" o with
| Some (Pdf.Integer i) ->
begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d ->
begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Form") -> () | Some (Pdf.Name "/Form") -> ()
| _ -> merror () | _ -> merror ()
end end
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> ()) | _ -> ())
pdf pdf
(* A link annotation is not nested within a <Link> tag. *) (* A link annotation is not nested within a <Link> tag. *)
let matterhorn_28_011 _ _ pdf = let matterhorn_28_011 _ _ pdf =
let parent_tree = read_parent_tree pdf in
Pdf.objiter Pdf.objiter
(fun _ o -> (fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with match Pdf.lookup_direct pdf "/Subtype" o with
| Some (Pdf.Name "/Link") -> | Some (Pdf.Name "/Link") ->
begin match Pdf.lookup_chain pdf o ["/StructParent"; "/S"] with begin match Pdf.lookup_direct pdf "/StructParent" o with
| Some (Pdf.Integer i) ->
begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d ->
begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Link") -> () | Some (Pdf.Name "/Link") -> ()
| _ -> merror () | _ -> merror ()
end end
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> ()) | _ -> ())
pdf pdf