diff --git a/cpdfua.ml b/cpdfua.ml index bd1520a..e448770 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -348,6 +348,11 @@ let matterhorn_09_008 st st2 pdf = let matterhorn_10_001 _ _ pdf = unimpl () +(* Not clear what to do for 001...005 here - if the top-level /Lang is present, + that rules all and is sufficient. So how could these then fail? Perhaps they + mean if an intervening one has "" as the lang - i.e unknown. Or, if it is + not on the ISO 3066 registry list? *) + (* Natural language for text in page content cannot be determined. *) let matterhorn_11_001 _ _ pdf = unimpl () @@ -367,7 +372,14 @@ let matterhorn_11_004 _ _ pdf = unimpl () let matterhorn_11_005 _ _ pdf = unimpl () (* Natural language for document metadata cannot be determined. *) -let matterhorn_11_006 _ _ pdf = unimpl () +let matterhorn_11_006 _ _ pdf = + (* Per 2008 14.9.2.1-2, document metadata language is simply determined by + the top-level /Lang. In theory, one could omit the top-level /Lang and use + xml:lang everywhere in the metadata, but we haven't seen an example which + omits top-level /Lang so this will do for now. *) + match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with + | Some (Pdf.String "") | None -> merror_str "No top-level /Lang" + | Some _ -> () (*
tag alternative or replacement text missing. *) let matterhorn_13_004 _ st2 pdf =