Streamline st/st2

This commit is contained in:
John Whitington 2024-06-20 14:50:06 +01:00
parent d812881619
commit c329895094
1 changed files with 109 additions and 117 deletions

210
cpdfua.ml
View File

@ -6,7 +6,6 @@ open Cpdferror
a) Those which require looking deep inside font files; and
b) Those which require reading inside the graphics stream. *)
(* FIXME pass st / st2 around *)
(* FIXME maximise chain usage *)
exception MatterhornError of Cpdfyojson.Safe.t
@ -108,9 +107,6 @@ let read_st2 pdf =
let rec st_of_st2 = function E2 (a, _, cs) -> E (a, map st_of_st2 cs)
let read_st pdf =
st_of_st2 (read_st2 pdf)
let rec st_mem p = function
| E (s, _) when p s -> true
| E (_, cs) -> List.exists (st_mem p) cs
@ -120,16 +116,16 @@ let string_of_st st =
Cpdfyojson.Safe.pretty_to_string (convert st)
(* Content marked as Artifact is present inside tagged content. *)
let matterhorn_01_003 pdf = todo ()
let matterhorn_01_003 _ _ pdf = todo ()
(* Tagged content is present inside content marked as Artifact. *)
let matterhorn_01_004 pdf = todo ()
let matterhorn_01_004 _ _ pdf = todo ()
(* Content is neither marked as Artifact nor tagged as real content. *)
let matterhorn_01_005 pdf = todo ()
let matterhorn_01_005 _ _ pdf = todo ()
(* Suspects entry has a value of true. *)
let matterhorn_01_007 pdf =
let matterhorn_01_007 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/MarkInfo"; "/Suspects"] with
| Some (Pdf.Boolean true) -> merror ()
| _ -> ()
@ -161,7 +157,7 @@ let circular rm =
(* One or more non-standard tags mapping does not terminate with a standard
type. *)
let matterhorn_02_001 pdf =
let matterhorn_02_001 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
| Some rm ->
let rolemap = read_rolemap pdf rm in
@ -170,7 +166,7 @@ let matterhorn_02_001 pdf =
| None -> ()
(* A circular mapping exists. *)
let matterhorn_02_003 pdf =
let matterhorn_02_003 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
| Some rm ->
let rolemap = read_rolemap pdf rm in
@ -178,7 +174,7 @@ let matterhorn_02_003 pdf =
| None -> ()
(* One or more standard types are remapped. *)
let matterhorn_02_004 pdf =
let matterhorn_02_004 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
| Some rm ->
let rolemap = read_rolemap pdf rm in
@ -186,14 +182,14 @@ let matterhorn_02_004 pdf =
| None -> ()
(* Document does not contain an XMP metadata stream *)
let matterhorn_06_001 pdf =
let matterhorn_06_001 _ _ pdf =
match Cpdfmetadata.get_metadata pdf with
| Some _ -> ()
| None -> merror ()
(* The XMP metadata stream in the Catalog dictionary does not include the
PDF/UA identifier. *)
let matterhorn_06_002 pdf =
let matterhorn_06_002 _ _ pdf =
match Cpdfmetadata.get_metadata pdf with
| Some metadata ->
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
@ -204,7 +200,7 @@ let matterhorn_06_002 pdf =
| None -> () (* case covered by test 06_001 above, no need for two failures *)
(* XMP metadata stream does not contain dc:title *)
let matterhorn_06_003 pdf =
let matterhorn_06_003 _ _ pdf =
match Cpdfmetadata.get_metadata pdf with
| Some metadata ->
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
@ -216,7 +212,7 @@ let matterhorn_06_003 pdf =
(* ViewerPreferences dictionary of the Catalog dictionary does not contain a
DisplayDocTitle entry. *)
let matterhorn_07_001 pdf =
let matterhorn_07_001 _ _ pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| Some catalog ->
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
@ -231,7 +227,7 @@ let matterhorn_07_001 pdf =
(* ViewerPreferences dictionary of the Catalog dictionary contains a
DisplayDocTitle entry with a value of false. *)
let matterhorn_07_002 pdf =
let matterhorn_07_002 _ _ pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| Some catalog ->
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
@ -247,7 +243,7 @@ let matterhorn_07_002 pdf =
(* A table-related structure element is used in a way that does not conform to
the syntax defined in ISO 32000-1, Table 337. We assume no nesting of whole
tables, since it is not excplicitly mentioned in the spec. *)
let matterhorn_09_004 pdf =
let matterhorn_09_004 st st2 pdf =
let rec check_table = function
| E ("/Table", cs) ->
let cs =
@ -286,11 +282,11 @@ let matterhorn_09_004 pdf =
if List.exists (function E ("/TR", _) -> false | _ -> true) node then
merror_str "Element in /THead | /TBody | /TFoot not a /TR"
in
check_table (read_st pdf)
check_table st
(* A list-related structure element is used in a way that does not conform to
Table 336 in ISO 32000-1. *)
let matterhorn_09_005 pdf =
let matterhorn_09_005 st st2 pdf =
let rec check_l = function
| E ("/L", cs) ->
(* 0 or 1 captions *)
@ -312,15 +308,14 @@ let matterhorn_09_005 pdf =
| E (("/LBody"| "/Lbl"), cs) -> iter check_l cs
| E (_, _) -> merror_str "Child of /LI must be /Lbl or /LBody"
in
check_l (read_st pdf)
check_l st
(* A TOC-related structure element is used in a way that does not conform to
Table 333 in ISO 32000-1. *)
(* We test two things: a) everything under a TOC is correct; and b) There is no
TOCI except under a TOC. *)
let matterhorn_09_006 pdf =
let st = read_st pdf in
let matterhorn_09_006 st st2 pdf =
let seen_toc = ref false in
let rec check_toplevel_TOCI n =
begin match n with
@ -353,8 +348,7 @@ let matterhorn_09_006 pdf =
(* A Ruby-related structure element is used in a way that does not conform to
Table 338 in ISO 32000-1. *)
let matterhorn_09_007 pdf =
let st = read_st pdf in
let matterhorn_09_007 st st2 pdf =
let rec check_ruby = function
| E ("/Ruby", cs) ->
if List.exists (function (E (("/RB" | "/RT" | "RP"), _)) -> false | _ -> true) cs then merror ()
@ -365,8 +359,7 @@ let matterhorn_09_007 pdf =
(* A Warichu-related structure element is used in a way that does not conform
to Table 338 in ISO 32000-1. *)
let matterhorn_09_008 pdf =
let st = read_st pdf in
let matterhorn_09_008 st st2 pdf =
let rec check_warichu = function
| E ("/Ruby", cs) ->
if List.exists (function (E (("/WT" | "/WP"), _)) -> false | _ -> true) cs then merror ()
@ -376,32 +369,32 @@ let matterhorn_09_008 pdf =
check_warichu st
(* Character code cannot be mapped to Unicode. *)
let matterhorn_10_001 pdf =
let matterhorn_10_001 _ _ pdf =
unimpl ()
(* Natural language for text in page content cannot be determined. *)
let matterhorn_11_001 pdf =
let matterhorn_11_001 _ _ pdf =
unimpl ()
(* Natural language for text in Alt, ActualText and E attributes cannot be
determined. *)
let matterhorn_11_002 pdf = todo ()
let matterhorn_11_002 _ _ pdf = todo ()
(* Natural language in the Outline entries cannot be determined. *)
let matterhorn_11_003 pdf = todo ()
let matterhorn_11_003 _ _ pdf = todo ()
(* Natural language in the Contents entry for annotations cannot be determined.
*)
let matterhorn_11_004 pdf = todo ()
let matterhorn_11_004 _ _ pdf = todo ()
(* Natural language in the TU entry for form fields cannot be determined. *)
let matterhorn_11_005 pdf = todo ()
let matterhorn_11_005 _ _ pdf = todo ()
(* Natural language for document metadata cannot be determined. *)
let matterhorn_11_006 pdf = todo ()
let matterhorn_11_006 _ _ pdf = todo ()
(* <Figure> tag alternative or replacement text missing. *)
let matterhorn_13_004 pdf = todo ()
let matterhorn_13_004 _ _ pdf = todo ()
let is_hnum s =
match explode s with
@ -411,18 +404,17 @@ let is_hnum s =
| _ -> false
(* Does use numbered headings, but the first heading tag is not <H1>. *)
let matterhorn_14_002 pdf =
let matterhorn_14_002 st st2 pdf =
let rec check_hn = function
| E ("/H1", cs) -> ()
| E (s, cs) when is_hnum s -> merror ()
| E (_, cs) -> iter check_hn cs
in
let st = read_st pdf in
check_hn st
(* Numbered heading levels in descending sequence are skipped (Example: <H3>
follows directly after <H1>). *)
let matterhorn_14_003 pdf =
let matterhorn_14_003 st st2 pdf =
let rec check_nseq n = function
| E (s, cs) when is_hnum s ->
let num = int_of_string (implode (tl (tl (explode s)))) in
@ -430,11 +422,10 @@ let matterhorn_14_003 pdf =
iter (check_nseq num) cs
| E (_, cs) -> iter (check_nseq n) cs
in
check_nseq 0 (read_st pdf)
check_nseq 0 st
(* A node contains more than one <H> tag. *)
let matterhorn_14_006 pdf =
let st = read_st pdf in
let matterhorn_14_006 st st2 pdf =
let found = ref false in
let rec check_hs (E (_, cs)) =
if length (option_map (function E ("/H", _) -> Some () | _ -> None) cs) > 1 then set found;
@ -444,13 +435,12 @@ let matterhorn_14_006 pdf =
if !found then merror ()
(* Document uses both <H> and <H#> tags. *)
let matterhorn_14_007 pdf =
let st = read_st pdf in
let matterhorn_14_007 st st2 pdf =
if st_mem (eq "/H") st && st_mem is_hnum st then merror ()
(* In a table not organized with Headers attributes and IDs, a <TH> cell does
not contain a Scope attribute. *)
let matterhorn_15_003 pdf =
let matterhorn_15_003 st st2 pdf =
(* For now, we complain any time a <TH> does not have a scope. The 2008 PDF
spec, 2014 PDF/UA spec and Matterhorn protocol combined do not quite give
enough information to know what is required. To be returned to. *)
@ -459,32 +449,32 @@ let matterhorn_15_003 pdf =
if not (List.mem "/Scope" attr) then merror_str "No scope, table organization not checked." (*else Printf.printf "Found /Scope in /TH\n"*)
| E2 (_, _, cs) -> iter check_th cs
in
check_th (read_st2 pdf)
check_th st2
(* <Formula> tag is missing an Alt attribute. *)
let matterhorn_17_002 pdf =
let matterhorn_17_002 _ st2 pdf =
let rec check_fm = function
| E2 ("/Formula", attr, _) ->
if not (List.mem "/Alt" attr) then merror ()
| E2 (_, _, cs) -> iter check_fm cs
in
check_fm (read_st2 pdf)
check_fm st2
(* Unicode mapping requirements are not met. *)
let matterhorn_17_003 pdf =
let matterhorn_17_003 _ _ pdf =
unimpl ()
(* ID entry of the <Note> tag is not present. *)
let matterhorn_19_003 pdf =
let matterhorn_19_003 st st2 pdf =
let rec check_note = function
| E2 ("/Note", attr, _) ->
if not (List.mem "/ID" attr) then merror ()
| E2 (_, _, cs) -> iter check_note cs
in
check_note (read_st2 pdf)
check_note st2
(* ID entry of the <Note> tag is non-unique. *)
let matterhorn_19_004 pdf =
let matterhorn_19_004 _ _ pdf =
(* Looking for /Type /StructElem /N /Note /ID to exist. *)
(* FIXME ClassMaps here? *)
let ids = ref [] in
@ -494,13 +484,12 @@ let matterhorn_19_004 pdf =
| Some (Pdf.Name "/StructElem"), Some (Pdf.Name "/Note"), Some (Pdf.String s) -> ids := s::!ids
| _ -> ())
pdf;
(* FIXME should write the big setify here, fix up in CamlPDF - this is a good example. *)
if length (setify !ids) < length !ids then merror ()
if length (setify_large !ids) < length !ids then merror ()
(* Name entry is missing or has an empty string as its value in an Optional
Content Configuration Dictionary in the Configs entry in the OCProperties
entry in the Catalog dictionary. *)
let matterhorn_20_001 pdf =
let matterhorn_20_001 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/Configs"] with
| Some (Pdf.Array occds) ->
iter (function x -> match Pdf.lookup_direct pdf "/Name" x with None | Some (Pdf.Name "") -> merror () | _ -> ()) occds
@ -509,13 +498,13 @@ let matterhorn_20_001 pdf =
(* Name entry is missing or has an empty string as its value in an Optional
Content Configuration Dictionary that is the value of the D entry in the
OCProperties entry in the Catalog dictionary. *)
let matterhorn_20_002 pdf =
let matterhorn_20_002 _ _ pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/Name"] with
| Some (Pdf.String "") | None -> merror ()
| _ -> ()
(* An AS entry appears in an Optional Content Configuration Dictionary. *)
let matterhorn_20_003 pdf =
let matterhorn_20_003 _ _ pdf =
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/AS"] with
| Some _ -> merror ()
| _ -> ()
@ -528,7 +517,7 @@ let matterhorn_20_003 pdf =
(* The file specification dictionary for an embedded file does not contain F
and UF entries. *)
let matterhorn_21_001 pdf =
let matterhorn_21_001 _ _ pdf =
let from_nametree =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| Some catalog ->
@ -562,7 +551,7 @@ let matterhorn_21_001 pdf =
merror ()
(* File contains the dynamicRender element with value “required”. *)
let matterhorn_25_001 pdf =
let matterhorn_25_001 _ _ pdf =
let rec contains_required_dynamicRender = function
| Cpdfmetadata.E (((_, "dynamicRender"), _), [Cpdfmetadata.D "required"]) -> true
| Cpdfmetadata.E (_, children) -> List.exists contains_required_dynamicRender children
@ -597,13 +586,13 @@ let matterhorn_25_001 pdf =
(* The file is encrypted but does not contain a P entry in its encryption
dictionary. *)
let matterhorn_26_001 pdf = ()
let matterhorn_26_001 _ _ pdf = ()
(* Would already have failed at this point, because CamlPDF does not allow
the decryption of a file with no /P *)
the decryption of a file with no /P. FIXME Can we make it fail with the right message? *)
(* The file is encrypted and does contain a P entry but the 10th bit position
of the P entry is false. *)
let matterhorn_26_002 pdf =
let matterhorn_26_002 _ _ pdf =
match pdf.Pdf.saved_encryption with
| None -> ()
| Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} ->
@ -611,20 +600,20 @@ let matterhorn_26_002 pdf =
(* An annotation, other than of subtype Widget, Link and PrinterMark, is not a
direct child of an <Annot> structure element. *)
let matterhorn_28_002 pdf = todo ()
let matterhorn_28_002 _ _ pdf = todo ()
(* An annotation, other than of subtype Widget, does not have a Contents entry
and does not have an alternative description (in the form of an Alt entry in
the enclosing structure element). *)
let matterhorn_28_004 pdf = todo ()
let matterhorn_28_004 _ _ pdf = todo ()
(* A form field does not have a TU entry and does not have an alternative
description (in the form of an Alt entry in the enclosing structure
element). *)
let matterhorn_28_005 pdf = todo ()
let matterhorn_28_005 _ _ pdf = todo ()
(* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *)
let matterhorn_28_006 pdf =
let matterhorn_28_006 _ _ pdf =
if
List.exists
(fun x -> match x.Pdfannot.subtype with Pdfannot.Unknown _ -> true | _ -> false)
@ -633,7 +622,7 @@ let matterhorn_28_006 pdf =
merror ()
(* An annotation of subtype TrapNet exists. *)
let matterhorn_28_007 pdf =
let matterhorn_28_007 _ _ pdf =
if
List.exists
(fun x -> x.Pdfannot.subtype = Pdfannot.TrapNet)
@ -642,7 +631,7 @@ let matterhorn_28_007 pdf =
merror ()
(* A page containing an annotation does not contain a Tabs entry *)
let matterhorn_28_008 pdf =
let matterhorn_28_008 _ _ pdf =
if
List.exists
(fun p ->
@ -653,7 +642,7 @@ let matterhorn_28_008 pdf =
(* A page containing an annotation has a Tabs entry with a value other than S.
*)
let matterhorn_28_009 pdf =
let matterhorn_28_009 _ _ pdf =
if
List.exists
(fun p ->
@ -665,14 +654,14 @@ let matterhorn_28_009 pdf =
merror ()
(* A widget annotation is not nested within a <Form> tag. *)
let matterhorn_28_010 pdf = todo ()
let matterhorn_28_010 _ _ pdf = todo ()
(* A link annotation is not nested within a <Link> tag. *)
let matterhorn_28_011 pdf = todo ()
let matterhorn_28_011 _ _ pdf = todo ()
(* A link annotation does not include an alternate description in its Contents
entry. *)
let matterhorn_28_012 pdf =
let matterhorn_28_012 _ _ pdf =
if
List.exists
(fun x -> x.Pdfannot.subtype = Pdfannot.Link && x.Pdfannot.annot_contents = None )
@ -681,7 +670,7 @@ let matterhorn_28_012 pdf =
merror ()
(* CT entry is missing from the media clip data dictionary. *)
let matterhorn_28_014 pdf =
let matterhorn_28_014 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
@ -690,7 +679,7 @@ let matterhorn_28_014 pdf =
pdf
(* Alt entry is missing from the media clip data dictionary. *)
let matterhorn_28_015 pdf =
let matterhorn_28_015 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
@ -699,18 +688,19 @@ let matterhorn_28_015 pdf =
pdf
(* File attachment annotations do not conform to 7.11. *)
let matterhorn_28_016 pdf =
let matterhorn_28_016 _ _ pdf =
(* FIXME ?? *)
covered_elsewhere ()
(* A PrinterMark annotation is included in the logical structure. *)
let matterhorn_28_017 pdf = todo ()
let matterhorn_28_017 _ _ pdf = todo ()
(* The appearance stream of a PrinterMark annotation is not marked as Artifact.
*)
let matterhorn_28_018 pdf = todo ()
let matterhorn_28_018 _ _ pdf = todo ()
(* A reference XObject is present. *)
let matterhorn_30_001 pdf =
let matterhorn_30_001 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Ref" o with
@ -719,7 +709,7 @@ let matterhorn_30_001 pdf =
pdf
(* Form XObject contains MCIDs and is referenced more than once. *)
let matterhorn_30_002 pdf =
let matterhorn_30_002 _ _ pdf =
(* We need to consider inheritence here. What solutions do we already have
for that, and do we need anything new? *)
unimpl ()
@ -727,7 +717,7 @@ let matterhorn_30_002 pdf =
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
has values for Registry in both CIDSystemInfo dictionaries that are not
identical. *)
let matterhorn_31_001 pdf =
let matterhorn_31_001 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
@ -742,19 +732,19 @@ let matterhorn_31_001 pdf =
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
has values for Ordering in both CIDSystemInfo dictionaries that are not
identical. *)
let matterhorn_31_002 pdf =
matterhorn_31_001 pdf
let matterhorn_31_002 st st2 pdf =
matterhorn_31_001 st st2 pdf
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
has a value for Supplement in the CIDSystemInfo dictionary of the CID font
that is less than the value for Supplement in the CIDSystemInfo dictionary
of the CMap. *)
let matterhorn_31_003 pdf =
matterhorn_31_001 pdf
let matterhorn_31_003 st st2 pdf =
matterhorn_31_001 st st2 pdf
(* A Type 2 CID font contains neither a stream nor the name Identity as the
value of the CIDToGIDMap entry. *)
let matterhorn_31_004 pdf =
let matterhorn_31_004 _ _ pdf =
Pdf.objiter
(fun _ n ->
match Pdf.lookup_direct pdf "/Subtype" n with
@ -767,7 +757,7 @@ let matterhorn_31_004 pdf =
pdf
(* A Type 2 CID font does not contain a CIDToGIDMap entry. *)
let matterhorn_31_005 pdf =
let matterhorn_31_005 _ _ pdf =
Pdf.objiter
(fun _ n ->
match Pdf.lookup_direct pdf "/Subtype" n with
@ -842,7 +832,7 @@ let cmap_names =
"/Identity-H";
"/Identity-V"]
let matterhorn_31_006 pdf =
let matterhorn_31_006 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -856,65 +846,65 @@ let matterhorn_31_006 pdf =
(* The WMode entry in a CMap dictionary is not identical to the WMode value in
the CMap stream. *)
let matterhorn_31_007 pdf =
let matterhorn_31_007 _ _ pdf =
unimpl ()
(* A CMap references another CMap which is not listed in ISO 32000-1:2008,
9.7.5.2, Table 118. *)
let matterhorn_31_008 pdf =
let matterhorn_31_008 _ _ pdf =
unimpl ()
(* For a font used by text intended to be rendered the font program is not
embedded. *)
(* NB This, for now, reports all unembedded fonts. *)
let matterhorn_31_009 pdf =
let matterhorn_31_009 _ _ pdf =
let l = Cpdffont.missing_fonts_return pdf (ilist 1 (Pdfpage.endpage pdf)) in
if l <> [] then
raise (MatterhornError (`List (map (fun x -> `String x) l)))
(* For a font used by text the font program is embedded but it does not contain
glyphs for all of the glyphs referenced by the text used for rendering. *)
let matterhorn_31_011 pdf =
let matterhorn_31_011 _ _ pdf =
unimpl ()
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
string, but at least one of the glyphs present in the font program is not
listed in the CharSet string. *)
let matterhorn_31_012 pdf =
let matterhorn_31_012 _ _ pdf =
unimpl ()
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
string, but at least one of the glyphs listed in the CharSet string is not
present in the font program. *)
let matterhorn_31_013 pdf =
let matterhorn_31_013 _ _ pdf =
unimpl ()
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
string, but at least one of the glyphs present in the font program is not
listed in the CIDSet string. *)
let matterhorn_31_014 pdf =
let matterhorn_31_014 _ _ pdf =
unimpl ()
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
string, but at least one of the glyphs listed in the CIDSet string is not
present in the font program. *)
let matterhorn_31_015 pdf =
let matterhorn_31_015 _ _ pdf =
unimpl ()
(* For one or more glyphs, the glyph width information in the font dictionary
and in the embedded font program differ by more than 1/1000 unit. *)
let matterhorn_31_016 pdf =
let matterhorn_31_016 _ _ pdf =
unimpl ()
(* A non-symbolic TrueType font is used for rendering, but none of the cmap
entries in the embedded font program is a non-symbolic cmap. *)
let matterhorn_31_017 pdf =
let matterhorn_31_017 _ _ pdf =
unimpl ()
(* A non-symbolic TrueType font is used for rendering, but for at least one
glyph to be rendered the glyph cannot be looked up by any of the
non-symbolic cmap entries in the embedded font program. *)
let matterhorn_31_018 pdf =
let matterhorn_31_018 _ _ pdf =
unimpl ()
(* The font dictionary for a non-symbolic TrueType font does not contain an
@ -928,7 +918,7 @@ let is_non_symbolic pdf o =
end
| None -> true
let matterhorn_31_019 pdf =
let matterhorn_31_019 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -942,7 +932,7 @@ let matterhorn_31_019 pdf =
(* The font dictionary for a non-symbolic TrueType font contains an Encoding
dictionary which does not contain a BaseEncoding entry. *)
let matterhorn_31_020 pdf =
let matterhorn_31_020 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -961,7 +951,7 @@ let matterhorn_31_020 pdf =
(* The value for either the Encoding entry or the BaseEncoding entry in the
Encoding dictionary in a non-symbolic TrueType font dictionary is neither
MacRomanEncoding nor WinAnsiEncoding. *)
let matterhorn_31_021 pdf =
let matterhorn_31_021 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -983,7 +973,7 @@ let matterhorn_31_021 pdf =
(* The Differences array in the Encoding entry in a non-symbolic TrueType font
dictionary contains one or more glyph names which are not listed in the
Adobe Glyph List. *)
let matterhorn_31_022 pdf =
let matterhorn_31_022 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -1005,12 +995,12 @@ let matterhorn_31_022 pdf =
(* The Differences array is present in the Encoding entry in a non-symbolic
TrueType font dictionary but the embedded font program does not contain a
(3,1) Microsoft Unicode cmap. *)
let matterhorn_31_023 pdf =
let matterhorn_31_023 _ _ pdf =
unimpl ()
(* The Encoding entry is present in the font dictionary for a symbolic TrueType
font. *)
let matterhorn_31_024 pdf =
let matterhorn_31_024 _ _ pdf =
Pdf.objiter
(fun _ o ->
match Pdf.lookup_direct pdf "/Subtype" o with
@ -1023,12 +1013,12 @@ let matterhorn_31_024 pdf =
pdf
(* The embedded font program for a symbolic TrueType font contains no cmap. *)
let matterhorn_31_025 pdf =
let matterhorn_31_025 _ _ pdf =
unimpl ()
(* The embedded font program for a symbolic TrueType font contains more than
one cmap, but none of the cmap entries is a (3,0) Microsoft Symbol cmap. *)
let matterhorn_31_026 pdf =
let matterhorn_31_026 _ _ pdf =
unimpl ()
(* A font dictionary does not contain the ToUnicode entry and none of the
@ -1039,7 +1029,7 @@ let matterhorn_31_026 pdf =
Annex D; the font is a Type 0 font, and its descendant CIDFont uses
Adobe-GB1, Adobe-CNS1, Adobe-Japan1 or Adobe-Korea1 character collections;
the font is a non-symbolic TrueType font. *)
let matterhorn_31_027 pdf =
let matterhorn_31_027 _ _ pdf =
not_fully_implemented ();
(* Here, we implement most of this one, but can't check the set of referenced
glyphs for Type1 / Type3. *)
@ -1099,7 +1089,7 @@ let check_unicode tu n =
mem n (flatten (map (fun x -> Pdftext.codepoints_of_utf16be (snd x)) tu))
(* One or more Unicode values specified in the ToUnicode CMap are zero (0). *)
let matterhorn_31_028 pdf =
let matterhorn_31_028 _ _ pdf =
iter
(fun i ->
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
@ -1108,7 +1098,7 @@ let matterhorn_31_028 pdf =
(* One or more Unicode values specified in the ToUnicode CMap are equal to
either U+FEFF or U+FFFE. *)
let matterhorn_31_029 pdf =
let matterhorn_31_029 _ _ pdf =
iter
(fun i ->
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
@ -1117,7 +1107,7 @@ let matterhorn_31_029 pdf =
(* One or more characters used in text showing operators reference the .notdef
glyph. *)
let matterhorn_31_030 pdf =
let matterhorn_31_030 _ _ pdf =
unimpl ()
let matterhorn =
@ -1211,9 +1201,11 @@ let matterhorn =
]
let test_matterhorn pdf =
let st2 = read_st2 pdf in
let st = st_of_st2 st2 in
option_map
(fun (name, error, section, test) ->
try test pdf; None with
try test st st2 pdf; None with
| MatterhornError extra -> Some (name, error, section, extra)
| MatterhornUnimplemented -> None
| e -> Some (name, "Incomplete", section, `String ("ERROR: " ^ Printexc.to_string e)))