Additional matterhorn checks

This commit is contained in:
John Whitington
2025-02-23 15:35:23 +00:00
parent 1e61f27bcc
commit d8d902749c

View File

@ -13,7 +13,10 @@ open Cpdferror
Unimplemented: Unimplemented:
31-011 31-012 31-013 31-014 31-015 31-016 31-018 31-030 Require looking 31-011 31-012 31-013 31-014 31-015 31-016 31-018 31-030 Require looking
inside font files *) inside font file
To return to:
15_003 17_003 31_009 31_027 *)
type subformat = type subformat =
| PDFUA1 | PDFUA1
@ -70,7 +73,7 @@ let read_attributes pdf stnode =
let from_c = read_a pdf "/C" stnode in let from_c = read_a pdf "/C" stnode in
(* Prefer entries from a, but we are just testing for presence, so merely setify *) (* Prefer entries from a, but we are just testing for presence, so merely setify *)
let attrs = setify (from_a @ from_c) in let attrs = setify (from_a @ from_c) in
(* For now, stick /ID, /Alt, /ActualText in here too. Eventually, move to prevent crashes. *) (* For now, stick /ID, /Alt, /ActualText in here too. *)
let alt = let alt =
match Pdf.lookup_direct pdf "/Alt" stnode with | Some _ -> ["/Alt"] | None -> [] match Pdf.lookup_direct pdf "/Alt" stnode with | Some _ -> ["/Alt"] | None -> []
in in
@ -1062,10 +1065,13 @@ let matterhorn_31_001 _ _ pdf =
(fun _ o -> (fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
| Some (Pdf.Name "/Type0"), Some (Pdf.Name ("/Identity-H" | "/Identity-V")) -> () | Some (Pdf.Name "/Type0"), Some (Pdf.Name ("/Identity-H" | "/Identity-V")) -> ()
| Some (Pdf.Name "/Type0"), _ -> | Some (Pdf.Name "/Type0"), Some (Pdf.Indirect enc) ->
merror_str if
"Advisory: contains composite font with non-identity encoding. Cpdf\ Pdf.lookup_chain pdf (Pdf.Indirect enc) ["/CIDSystemInfo"; "/Registry"]
cannot check the CIDSystemInfo entries are identical automatically." <>
Pdf.lookup_chain pdf o ["/DescendantFonts"; "/[0]"; "/CIDSystemInfo"; "/Registry"]
then
merror ()
| _ -> ()) | _ -> ())
pdf pdf
@ -1073,14 +1079,38 @@ let matterhorn_31_001 _ _ pdf =
has values for Ordering in both CIDSystemInfo dictionaries that are not has values for Ordering in both CIDSystemInfo dictionaries that are not
identical. *) identical. *)
let matterhorn_31_002 st st2 pdf = let matterhorn_31_002 st st2 pdf =
matterhorn_31_001 st st2 pdf Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
| Some (Pdf.Name "/Type0"), Some (Pdf.Name ("/Identity-H" | "/Identity-V")) -> ()
| Some (Pdf.Name "/Type0"), Some (Pdf.Indirect enc) ->
if
Pdf.lookup_chain pdf (Pdf.Indirect enc) ["/CIDSystemInfo"; "/Ordering"]
<>
Pdf.lookup_chain pdf o ["/DescendantFonts"; "/[0]"; "/CIDSystemInfo"; "/Ordering"]
then
merror ()
| _ -> ())
pdf
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V (* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
has a value for Supplement in the CIDSystemInfo dictionary of the CID font has a value for Supplement in the CIDSystemInfo dictionary of the CID font
that is less than the value for Supplement in the CIDSystemInfo dictionary that is less than the value for Supplement in the CIDSystemInfo dictionary
of the CMap. *) of the CMap. *)
let matterhorn_31_003 st st2 pdf = let matterhorn_31_003 st st2 pdf =
matterhorn_31_001 st st2 pdf Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
| Some (Pdf.Name "/Type0"), Some (Pdf.Name ("/Identity-H" | "/Identity-V")) -> ()
| Some (Pdf.Name "/Type0"), Some (Pdf.Indirect enc) ->
if
Pdf.lookup_chain pdf (Pdf.Indirect enc) ["/CIDSystemInfo"; "/Registry"]
<>
Pdf.lookup_chain pdf o ["/DescendantFonts"; "/[0]"; "/CIDSystemInfo"; "/Ordering"]
then
merror ()
| _ -> ())
pdf
(* A Type 2 CID font contains neither a stream nor the name Identity as the (* A Type 2 CID font contains neither a stream nor the name Identity as the
value of the CIDToGIDMap entry. *) value of the CIDToGIDMap entry. *)