More matterhorn fixes

This commit is contained in:
John Whitington 2024-06-26 16:43:45 +01:00
parent b0c630d822
commit 4d0fc879c4
1 changed files with 95 additions and 52 deletions

147
cpdfua.ml
View File

@ -583,48 +583,73 @@ let matterhorn_26_002 _ _ pdf =
| Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} -> | Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} ->
if mem Pdfcrypt.NoExtract (Pdfcrypt.banlist_of_p p) then merror () if mem Pdfcrypt.NoExtract (Pdfcrypt.banlist_of_p p) then merror ()
let read_parent_tree pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/ParentTree"] with
| Some t -> Pdftree.read_number_tree pdf t
| None -> []
(* An annotation, other than of subtype Widget, Link and PrinterMark, is not a (* An annotation, other than of subtype Widget, Link and PrinterMark, is not a
direct child of an <Annot> structure element. *) direct child of an <Annot> structure element. *)
let matterhorn_28_002 _ _ pdf = let matterhorn_28_002 _ _ pdf =
(* Find object numbers of all annotations which are not Widget, Link, or Printermark. *) let parent_tree = read_parent_tree pdf in
Pdf.objiter (* Find object numbers of all annotations which are not Widget, Link, or Printermark. *)
(fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with Pdf.objiter
| Some (Pdf.Name (fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with
("/Stamp" | "/Line" | "Square" | "/Circle" | "/Polygon" | "/PolyLine" | | Some (Pdf.Name
"/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" | ("/Stamp" | "/Line" | "Square" | "/Circle" | "/Polygon" | "/PolyLine" |
"/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" | "/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" |
"/Watermark" | "/3D")) -> "/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" |
(* Check that every /StructParent entry for each of these points to something "/Watermark" | "/3D")) ->
with /S /Annot. No need to worry about rolemapping, because PDF/UA docs (* Check that every /StructParent entry for each of these points to something
aren't allowed to remap standard types. *) with /S /Annot. No need to worry about rolemapping, because PDF/UA docs
begin match Pdf.lookup_chain pdf obj ["/StructParent"; "/S"] with aren't allowed to remap standard types. *)
| Some (Pdf.Name "/Annot") -> () begin match Pdf.lookup_direct pdf "/StructParent" obj with
| _ -> merror () | Some (Pdf.Integer i) ->
end begin match List.assoc_opt (string_of_int i) parent_tree with
| _ -> ()) | Some d ->
pdf begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Annot") -> ()
| _ -> merror ()
end
| None -> merror ()
end
| _ -> merror ()
end
| _ -> ())
pdf
(* An annotation, other than of subtype Widget, does not have a Contents entry (* An annotation, other than of subtype Widget, does not have a Contents entry
and does not have an alternative description (in the form of an Alt entry in and does not have an alternative description (in the form of an Alt entry in
the enclosing structure element). *) the enclosing structure element). *)
let matterhorn_28_004 _ _ pdf = let matterhorn_28_004 _ _ pdf =
Pdf.objiter let parent_tree = read_parent_tree pdf in
(fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with Pdf.objiter
| Some (Pdf.Name (fun n obj -> match Pdf.lookup_direct pdf "/Subtype" obj with
("/Stamp" | "/Line" | "Square" | "/Circle" | "/Polygon" | "/PolyLine" | | Some (Pdf.Name
"/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" | ("/Stamp" | "/Line" | "Square" | "/Circle" | "/Polygon" | "/PolyLine" |
"/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" | "/Highlight" | "/Underline" | "/Squiggly" | "/StrikeOut" | "/Caret" |
"/Watermark" | "/3D" | "/Link" | "/PrinterMark")) -> "/Ink" | "/FileAttachment" | "/Sound" | "/Movie" | "/Screen" | "/TrapNet" |
begin match Pdf.lookup_direct pdf "/Contents" obj with "/Watermark" | "/3D" | "/Link" | "/PrinterMark")) ->
| Some _ -> () Printf.printf "%S\n" (Pdfwrite.string_of_pdf obj);
| None -> begin match Pdf.lookup_direct pdf "/Contents" obj with
begin match Pdf.lookup_chain pdf obj ["/StructParent"; "/Alt"] with | Some _ -> ()
| Some _ -> () | None ->
begin match Pdf.lookup_direct pdf "/StructParent" obj with
| Some (Pdf.Integer i) ->
begin match List.assoc_opt (string_of_int i) parent_tree with
| Some d ->
begin match Pdf.lookup_direct pdf "/Alt" d with
| Some _ -> ()
| _ -> merror ()
end
| None -> merror ()
end
| _ -> merror () | _ -> merror ()
end end
end
| _ -> ()) end
pdf | _ -> ())
pdf
(* A form field does not have a TU entry and does not have an alternative (* A form field does not have a TU entry and does not have an alternative
description (in the form of an Alt entry in the enclosing structure description (in the form of an Alt entry in the enclosing structure
@ -675,29 +700,47 @@ let matterhorn_28_009 _ _ pdf =
(* A widget annotation is not nested within a <Form> tag. *) (* A widget annotation is not nested within a <Form> tag. *)
let matterhorn_28_010 _ _ pdf = let matterhorn_28_010 _ _ pdf =
Pdf.objiter let parent_tree = read_parent_tree pdf in
(fun _ o -> Pdf.objiter
match Pdf.lookup_direct pdf "/Subtype" o with (fun _ o ->
| Some (Pdf.Name "/Widget") -> match Pdf.lookup_direct pdf "/Subtype" o with
begin match Pdf.lookup_chain pdf o ["/StructParent"; "/S"] with | Some (Pdf.Name "/Widget") ->
| Some (Pdf.Name "/Form") -> () begin match Pdf.lookup_direct pdf "/StructParent" o with
| _ -> merror () | Some (Pdf.Integer i) ->
end begin match List.assoc_opt (string_of_int i) parent_tree with
| _ -> ()) | Some d ->
pdf begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Form") -> ()
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> ())
pdf
(* A link annotation is not nested within a <Link> tag. *) (* A link annotation is not nested within a <Link> tag. *)
let matterhorn_28_011 _ _ pdf = let matterhorn_28_011 _ _ pdf =
Pdf.objiter let parent_tree = read_parent_tree pdf in
(fun _ o -> Pdf.objiter
match Pdf.lookup_direct pdf "/Subtype" o with (fun _ o ->
| Some (Pdf.Name "/Link") -> match Pdf.lookup_direct pdf "/Subtype" o with
begin match Pdf.lookup_chain pdf o ["/StructParent"; "/S"] with | Some (Pdf.Name "/Link") ->
| Some (Pdf.Name "/Link") -> () begin match Pdf.lookup_direct pdf "/StructParent" o with
| _ -> merror () | Some (Pdf.Integer i) ->
end begin match List.assoc_opt (string_of_int i) parent_tree with
| _ -> ()) | Some d ->
pdf begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.Name "/Link") -> ()
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> merror ()
end
| _ -> ())
pdf
(* A link annotation does not include an alternate description in its Contents (* A link annotation does not include an alternate description in its Contents
entry. *) entry. *)