Firm up standard structure types

This commit is contained in:
John Whitington
2024-06-20 15:26:43 +01:00
parent d749660873
commit 835163cf9f

View File

@@ -128,23 +128,18 @@ let matterhorn_01_007 _ _ pdf =
| Some (Pdf.Boolean true) -> merror () | Some (Pdf.Boolean true) -> merror ()
| _ -> () | _ -> ()
(* Here, for now, we allow the ISO 32000 and ISO 32000-2 *) let standard_structure_types_2008 =
(* FIXME which verison of PDF/UA are we doing? Can we do both? or pick? *) ["/Document"; "/Part"; "/Sect"; "/Div"; "/NonStruct"; "/P"; "/H1"; "/H2";
let standard_structure_types = "/H3"; "/H4"; "/H5"; "/H6"; "/H"; "/Lbl"; "/Span"; "/Link"; "/Annot";
["/Document"; "/DocumentFragment"; "/Part"; "/Sect"; "/Div"; "/Aside";
"/NonStruct"; "/P"; "/H1"; "/H2"; "/H3"; "/H4"; "/H5"; "/H6"; "/H"; "/Title";
"/FENote"; "/Sub"; "/Lbl"; "/Span"; "/Em"; "/Strong"; "/Link"; "/Annot";
"/Form"; "/Ruby"; "/RB"; "/RT"; "/RP"; "/Warichu"; "/WT"; "/WP"; "/L"; "/LI"; "/Form"; "/Ruby"; "/RB"; "/RT"; "/RP"; "/Warichu"; "/WT"; "/WP"; "/L"; "/LI";
"/LBody"; "/Table"; "/TR"; "/TH"; "/TD"; "/THead"; "/TBody"; "/TFoot"; "/LBody"; "/Table"; "/TR"; "/TH"; "/TD"; "/THead"; "/TBody"; "/TFoot";
"/Caption"; "/Figure"; "/Formula"; "/Artifact"; "/Caption"; "/Figure"; "/Formula"; "/Art"; "/BlockQuote"; "/TOC"; "/TOCI";
(* 2008 ISO 3200 only *) "/Index"; "/Private"; "/Quote"; "/Note"; "/Reference"; "/Code"; "/BibEntry"]
"/Art"; "/BlockQuote"; "/TOC"; "/TOCI"; "/Index"; "/Private"; "/Quote";
"/Note"; "/Reference"; "/Code"]
let rec follow_standard rm n = let rec follow_standard rm n =
match List.assoc_opt n rm with match List.assoc_opt n rm with
| None -> raise Exit | None -> raise Exit
| Some x when mem x standard_structure_types -> () | Some x when mem x standard_structure_types_2008 -> ()
| Some x -> follow_standard rm x | Some x -> follow_standard rm x
let circular rm = let circular rm =
@@ -176,7 +171,7 @@ let matterhorn_02_004 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
| Some rm -> | Some rm ->
let rolemap = read_rolemap pdf rm in let rolemap = read_rolemap pdf rm in
if List.exists (function k -> mem k standard_structure_types) (map fst rolemap) then merror () if List.exists (function k -> mem k standard_structure_types_2008) (map fst rolemap) then merror ()
| None -> () | None -> ()
(* Document does not contain an XMP metadata stream *) (* Document does not contain an XMP metadata stream *)