Streamline st/st2
This commit is contained in:
parent
d812881619
commit
c329895094
210
cpdfua.ml
210
cpdfua.ml
|
@ -6,7 +6,6 @@ open Cpdferror
|
||||||
a) Those which require looking deep inside font files; and
|
a) Those which require looking deep inside font files; and
|
||||||
b) Those which require reading inside the graphics stream. *)
|
b) Those which require reading inside the graphics stream. *)
|
||||||
|
|
||||||
(* FIXME pass st / st2 around *)
|
|
||||||
(* FIXME maximise chain usage *)
|
(* FIXME maximise chain usage *)
|
||||||
|
|
||||||
exception MatterhornError of Cpdfyojson.Safe.t
|
exception MatterhornError of Cpdfyojson.Safe.t
|
||||||
|
@ -108,9 +107,6 @@ let read_st2 pdf =
|
||||||
|
|
||||||
let rec st_of_st2 = function E2 (a, _, cs) -> E (a, map st_of_st2 cs)
|
let rec st_of_st2 = function E2 (a, _, cs) -> E (a, map st_of_st2 cs)
|
||||||
|
|
||||||
let read_st pdf =
|
|
||||||
st_of_st2 (read_st2 pdf)
|
|
||||||
|
|
||||||
let rec st_mem p = function
|
let rec st_mem p = function
|
||||||
| E (s, _) when p s -> true
|
| E (s, _) when p s -> true
|
||||||
| E (_, cs) -> List.exists (st_mem p) cs
|
| E (_, cs) -> List.exists (st_mem p) cs
|
||||||
|
@ -120,16 +116,16 @@ let string_of_st st =
|
||||||
Cpdfyojson.Safe.pretty_to_string (convert st)
|
Cpdfyojson.Safe.pretty_to_string (convert st)
|
||||||
|
|
||||||
(* Content marked as Artifact is present inside tagged content. *)
|
(* Content marked as Artifact is present inside tagged content. *)
|
||||||
let matterhorn_01_003 pdf = todo ()
|
let matterhorn_01_003 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Tagged content is present inside content marked as Artifact. *)
|
(* Tagged content is present inside content marked as Artifact. *)
|
||||||
let matterhorn_01_004 pdf = todo ()
|
let matterhorn_01_004 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Content is neither marked as Artifact nor tagged as real content. *)
|
(* Content is neither marked as Artifact nor tagged as real content. *)
|
||||||
let matterhorn_01_005 pdf = todo ()
|
let matterhorn_01_005 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Suspects entry has a value of true. *)
|
(* Suspects entry has a value of true. *)
|
||||||
let matterhorn_01_007 pdf =
|
let matterhorn_01_007 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/MarkInfo"; "/Suspects"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/MarkInfo"; "/Suspects"] with
|
||||||
| Some (Pdf.Boolean true) -> merror ()
|
| Some (Pdf.Boolean true) -> merror ()
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
@ -161,7 +157,7 @@ let circular rm =
|
||||||
|
|
||||||
(* One or more non-standard tag’s mapping does not terminate with a standard
|
(* One or more non-standard tag’s mapping does not terminate with a standard
|
||||||
type. *)
|
type. *)
|
||||||
let matterhorn_02_001 pdf =
|
let matterhorn_02_001 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
||||||
| Some rm ->
|
| Some rm ->
|
||||||
let rolemap = read_rolemap pdf rm in
|
let rolemap = read_rolemap pdf rm in
|
||||||
|
@ -170,7 +166,7 @@ let matterhorn_02_001 pdf =
|
||||||
| None -> ()
|
| None -> ()
|
||||||
|
|
||||||
(* A circular mapping exists. *)
|
(* A circular mapping exists. *)
|
||||||
let matterhorn_02_003 pdf =
|
let matterhorn_02_003 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
||||||
| Some rm ->
|
| Some rm ->
|
||||||
let rolemap = read_rolemap pdf rm in
|
let rolemap = read_rolemap pdf rm in
|
||||||
|
@ -178,7 +174,7 @@ let matterhorn_02_003 pdf =
|
||||||
| None -> ()
|
| None -> ()
|
||||||
|
|
||||||
(* One or more standard types are remapped. *)
|
(* One or more standard types are remapped. *)
|
||||||
let matterhorn_02_004 pdf =
|
let matterhorn_02_004 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot"; "/RoleMap"] with
|
||||||
| Some rm ->
|
| Some rm ->
|
||||||
let rolemap = read_rolemap pdf rm in
|
let rolemap = read_rolemap pdf rm in
|
||||||
|
@ -186,14 +182,14 @@ let matterhorn_02_004 pdf =
|
||||||
| None -> ()
|
| None -> ()
|
||||||
|
|
||||||
(* Document does not contain an XMP metadata stream *)
|
(* Document does not contain an XMP metadata stream *)
|
||||||
let matterhorn_06_001 pdf =
|
let matterhorn_06_001 _ _ pdf =
|
||||||
match Cpdfmetadata.get_metadata pdf with
|
match Cpdfmetadata.get_metadata pdf with
|
||||||
| Some _ -> ()
|
| Some _ -> ()
|
||||||
| None -> merror ()
|
| None -> merror ()
|
||||||
|
|
||||||
(* The XMP metadata stream in the Catalog dictionary does not include the
|
(* The XMP metadata stream in the Catalog dictionary does not include the
|
||||||
PDF/UA identifier. *)
|
PDF/UA identifier. *)
|
||||||
let matterhorn_06_002 pdf =
|
let matterhorn_06_002 _ _ pdf =
|
||||||
match Cpdfmetadata.get_metadata pdf with
|
match Cpdfmetadata.get_metadata pdf with
|
||||||
| Some metadata ->
|
| Some metadata ->
|
||||||
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
||||||
|
@ -204,7 +200,7 @@ let matterhorn_06_002 pdf =
|
||||||
| None -> () (* case covered by test 06_001 above, no need for two failures *)
|
| None -> () (* case covered by test 06_001 above, no need for two failures *)
|
||||||
|
|
||||||
(* XMP metadata stream does not contain dc:title *)
|
(* XMP metadata stream does not contain dc:title *)
|
||||||
let matterhorn_06_003 pdf =
|
let matterhorn_06_003 _ _ pdf =
|
||||||
match Cpdfmetadata.get_metadata pdf with
|
match Cpdfmetadata.get_metadata pdf with
|
||||||
| Some metadata ->
|
| Some metadata ->
|
||||||
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
let _, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
||||||
|
@ -216,7 +212,7 @@ let matterhorn_06_003 pdf =
|
||||||
|
|
||||||
(* ViewerPreferences dictionary of the Catalog dictionary does not contain a
|
(* ViewerPreferences dictionary of the Catalog dictionary does not contain a
|
||||||
DisplayDocTitle entry. *)
|
DisplayDocTitle entry. *)
|
||||||
let matterhorn_07_001 pdf =
|
let matterhorn_07_001 _ _ pdf =
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
| Some catalog ->
|
| Some catalog ->
|
||||||
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
||||||
|
@ -231,7 +227,7 @@ let matterhorn_07_001 pdf =
|
||||||
|
|
||||||
(* ViewerPreferences dictionary of the Catalog dictionary contains a
|
(* ViewerPreferences dictionary of the Catalog dictionary contains a
|
||||||
DisplayDocTitle entry with a value of false. *)
|
DisplayDocTitle entry with a value of false. *)
|
||||||
let matterhorn_07_002 pdf =
|
let matterhorn_07_002 _ _ pdf =
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
| Some catalog ->
|
| Some catalog ->
|
||||||
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
begin match Pdf.lookup_direct pdf "/ViewerPreferences" catalog with
|
||||||
|
@ -247,7 +243,7 @@ let matterhorn_07_002 pdf =
|
||||||
(* A table-related structure element is used in a way that does not conform to
|
(* A table-related structure element is used in a way that does not conform to
|
||||||
the syntax defined in ISO 32000-1, Table 337. We assume no nesting of whole
|
the syntax defined in ISO 32000-1, Table 337. We assume no nesting of whole
|
||||||
tables, since it is not excplicitly mentioned in the spec. *)
|
tables, since it is not excplicitly mentioned in the spec. *)
|
||||||
let matterhorn_09_004 pdf =
|
let matterhorn_09_004 st st2 pdf =
|
||||||
let rec check_table = function
|
let rec check_table = function
|
||||||
| E ("/Table", cs) ->
|
| E ("/Table", cs) ->
|
||||||
let cs =
|
let cs =
|
||||||
|
@ -286,11 +282,11 @@ let matterhorn_09_004 pdf =
|
||||||
if List.exists (function E ("/TR", _) -> false | _ -> true) node then
|
if List.exists (function E ("/TR", _) -> false | _ -> true) node then
|
||||||
merror_str "Element in /THead | /TBody | /TFoot not a /TR"
|
merror_str "Element in /THead | /TBody | /TFoot not a /TR"
|
||||||
in
|
in
|
||||||
check_table (read_st pdf)
|
check_table st
|
||||||
|
|
||||||
(* A list-related structure element is used in a way that does not conform to
|
(* A list-related structure element is used in a way that does not conform to
|
||||||
Table 336 in ISO 32000-1. *)
|
Table 336 in ISO 32000-1. *)
|
||||||
let matterhorn_09_005 pdf =
|
let matterhorn_09_005 st st2 pdf =
|
||||||
let rec check_l = function
|
let rec check_l = function
|
||||||
| E ("/L", cs) ->
|
| E ("/L", cs) ->
|
||||||
(* 0 or 1 captions *)
|
(* 0 or 1 captions *)
|
||||||
|
@ -312,15 +308,14 @@ let matterhorn_09_005 pdf =
|
||||||
| E (("/LBody"| "/Lbl"), cs) -> iter check_l cs
|
| E (("/LBody"| "/Lbl"), cs) -> iter check_l cs
|
||||||
| E (_, _) -> merror_str "Child of /LI must be /Lbl or /LBody"
|
| E (_, _) -> merror_str "Child of /LI must be /Lbl or /LBody"
|
||||||
in
|
in
|
||||||
check_l (read_st pdf)
|
check_l st
|
||||||
|
|
||||||
(* A TOC-related structure element is used in a way that does not conform to
|
(* A TOC-related structure element is used in a way that does not conform to
|
||||||
Table 333 in ISO 32000-1. *)
|
Table 333 in ISO 32000-1. *)
|
||||||
|
|
||||||
(* We test two things: a) everything under a TOC is correct; and b) There is no
|
(* We test two things: a) everything under a TOC is correct; and b) There is no
|
||||||
TOCI except under a TOC. *)
|
TOCI except under a TOC. *)
|
||||||
let matterhorn_09_006 pdf =
|
let matterhorn_09_006 st st2 pdf =
|
||||||
let st = read_st pdf in
|
|
||||||
let seen_toc = ref false in
|
let seen_toc = ref false in
|
||||||
let rec check_toplevel_TOCI n =
|
let rec check_toplevel_TOCI n =
|
||||||
begin match n with
|
begin match n with
|
||||||
|
@ -353,8 +348,7 @@ let matterhorn_09_006 pdf =
|
||||||
|
|
||||||
(* A Ruby-related structure element is used in a way that does not conform to
|
(* A Ruby-related structure element is used in a way that does not conform to
|
||||||
Table 338 in ISO 32000-1. *)
|
Table 338 in ISO 32000-1. *)
|
||||||
let matterhorn_09_007 pdf =
|
let matterhorn_09_007 st st2 pdf =
|
||||||
let st = read_st pdf in
|
|
||||||
let rec check_ruby = function
|
let rec check_ruby = function
|
||||||
| E ("/Ruby", cs) ->
|
| E ("/Ruby", cs) ->
|
||||||
if List.exists (function (E (("/RB" | "/RT" | "RP"), _)) -> false | _ -> true) cs then merror ()
|
if List.exists (function (E (("/RB" | "/RT" | "RP"), _)) -> false | _ -> true) cs then merror ()
|
||||||
|
@ -365,8 +359,7 @@ let matterhorn_09_007 pdf =
|
||||||
|
|
||||||
(* A Warichu-related structure element is used in a way that does not conform
|
(* A Warichu-related structure element is used in a way that does not conform
|
||||||
to Table 338 in ISO 32000-1. *)
|
to Table 338 in ISO 32000-1. *)
|
||||||
let matterhorn_09_008 pdf =
|
let matterhorn_09_008 st st2 pdf =
|
||||||
let st = read_st pdf in
|
|
||||||
let rec check_warichu = function
|
let rec check_warichu = function
|
||||||
| E ("/Ruby", cs) ->
|
| E ("/Ruby", cs) ->
|
||||||
if List.exists (function (E (("/WT" | "/WP"), _)) -> false | _ -> true) cs then merror ()
|
if List.exists (function (E (("/WT" | "/WP"), _)) -> false | _ -> true) cs then merror ()
|
||||||
|
@ -376,32 +369,32 @@ let matterhorn_09_008 pdf =
|
||||||
check_warichu st
|
check_warichu st
|
||||||
|
|
||||||
(* Character code cannot be mapped to Unicode. *)
|
(* Character code cannot be mapped to Unicode. *)
|
||||||
let matterhorn_10_001 pdf =
|
let matterhorn_10_001 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* Natural language for text in page content cannot be determined. *)
|
(* Natural language for text in page content cannot be determined. *)
|
||||||
let matterhorn_11_001 pdf =
|
let matterhorn_11_001 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* Natural language for text in Alt, ActualText and E attributes cannot be
|
(* Natural language for text in Alt, ActualText and E attributes cannot be
|
||||||
determined. *)
|
determined. *)
|
||||||
let matterhorn_11_002 pdf = todo ()
|
let matterhorn_11_002 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Natural language in the Outline entries cannot be determined. *)
|
(* Natural language in the Outline entries cannot be determined. *)
|
||||||
let matterhorn_11_003 pdf = todo ()
|
let matterhorn_11_003 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Natural language in the Contents entry for annotations cannot be determined.
|
(* Natural language in the Contents entry for annotations cannot be determined.
|
||||||
*)
|
*)
|
||||||
let matterhorn_11_004 pdf = todo ()
|
let matterhorn_11_004 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Natural language in the TU entry for form fields cannot be determined. *)
|
(* Natural language in the TU entry for form fields cannot be determined. *)
|
||||||
let matterhorn_11_005 pdf = todo ()
|
let matterhorn_11_005 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* Natural language for document metadata cannot be determined. *)
|
(* Natural language for document metadata cannot be determined. *)
|
||||||
let matterhorn_11_006 pdf = todo ()
|
let matterhorn_11_006 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* <Figure> tag alternative or replacement text missing. *)
|
(* <Figure> tag alternative or replacement text missing. *)
|
||||||
let matterhorn_13_004 pdf = todo ()
|
let matterhorn_13_004 _ _ pdf = todo ()
|
||||||
|
|
||||||
let is_hnum s =
|
let is_hnum s =
|
||||||
match explode s with
|
match explode s with
|
||||||
|
@ -411,18 +404,17 @@ let is_hnum s =
|
||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
(* Does use numbered headings, but the first heading tag is not <H1>. *)
|
(* Does use numbered headings, but the first heading tag is not <H1>. *)
|
||||||
let matterhorn_14_002 pdf =
|
let matterhorn_14_002 st st2 pdf =
|
||||||
let rec check_hn = function
|
let rec check_hn = function
|
||||||
| E ("/H1", cs) -> ()
|
| E ("/H1", cs) -> ()
|
||||||
| E (s, cs) when is_hnum s -> merror ()
|
| E (s, cs) when is_hnum s -> merror ()
|
||||||
| E (_, cs) -> iter check_hn cs
|
| E (_, cs) -> iter check_hn cs
|
||||||
in
|
in
|
||||||
let st = read_st pdf in
|
|
||||||
check_hn st
|
check_hn st
|
||||||
|
|
||||||
(* Numbered heading levels in descending sequence are skipped (Example: <H3>
|
(* Numbered heading levels in descending sequence are skipped (Example: <H3>
|
||||||
follows directly after <H1>). *)
|
follows directly after <H1>). *)
|
||||||
let matterhorn_14_003 pdf =
|
let matterhorn_14_003 st st2 pdf =
|
||||||
let rec check_nseq n = function
|
let rec check_nseq n = function
|
||||||
| E (s, cs) when is_hnum s ->
|
| E (s, cs) when is_hnum s ->
|
||||||
let num = int_of_string (implode (tl (tl (explode s)))) in
|
let num = int_of_string (implode (tl (tl (explode s)))) in
|
||||||
|
@ -430,11 +422,10 @@ let matterhorn_14_003 pdf =
|
||||||
iter (check_nseq num) cs
|
iter (check_nseq num) cs
|
||||||
| E (_, cs) -> iter (check_nseq n) cs
|
| E (_, cs) -> iter (check_nseq n) cs
|
||||||
in
|
in
|
||||||
check_nseq 0 (read_st pdf)
|
check_nseq 0 st
|
||||||
|
|
||||||
(* A node contains more than one <H> tag. *)
|
(* A node contains more than one <H> tag. *)
|
||||||
let matterhorn_14_006 pdf =
|
let matterhorn_14_006 st st2 pdf =
|
||||||
let st = read_st pdf in
|
|
||||||
let found = ref false in
|
let found = ref false in
|
||||||
let rec check_hs (E (_, cs)) =
|
let rec check_hs (E (_, cs)) =
|
||||||
if length (option_map (function E ("/H", _) -> Some () | _ -> None) cs) > 1 then set found;
|
if length (option_map (function E ("/H", _) -> Some () | _ -> None) cs) > 1 then set found;
|
||||||
|
@ -444,13 +435,12 @@ let matterhorn_14_006 pdf =
|
||||||
if !found then merror ()
|
if !found then merror ()
|
||||||
|
|
||||||
(* Document uses both <H> and <H#> tags. *)
|
(* Document uses both <H> and <H#> tags. *)
|
||||||
let matterhorn_14_007 pdf =
|
let matterhorn_14_007 st st2 pdf =
|
||||||
let st = read_st pdf in
|
|
||||||
if st_mem (eq "/H") st && st_mem is_hnum st then merror ()
|
if st_mem (eq "/H") st && st_mem is_hnum st then merror ()
|
||||||
|
|
||||||
(* In a table not organized with Headers attributes and IDs, a <TH> cell does
|
(* In a table not organized with Headers attributes and IDs, a <TH> cell does
|
||||||
not contain a Scope attribute. *)
|
not contain a Scope attribute. *)
|
||||||
let matterhorn_15_003 pdf =
|
let matterhorn_15_003 st st2 pdf =
|
||||||
(* For now, we complain any time a <TH> does not have a scope. The 2008 PDF
|
(* For now, we complain any time a <TH> does not have a scope. The 2008 PDF
|
||||||
spec, 2014 PDF/UA spec and Matterhorn protocol combined do not quite give
|
spec, 2014 PDF/UA spec and Matterhorn protocol combined do not quite give
|
||||||
enough information to know what is required. To be returned to. *)
|
enough information to know what is required. To be returned to. *)
|
||||||
|
@ -459,32 +449,32 @@ let matterhorn_15_003 pdf =
|
||||||
if not (List.mem "/Scope" attr) then merror_str "No scope, table organization not checked." (*else Printf.printf "Found /Scope in /TH\n"*)
|
if not (List.mem "/Scope" attr) then merror_str "No scope, table organization not checked." (*else Printf.printf "Found /Scope in /TH\n"*)
|
||||||
| E2 (_, _, cs) -> iter check_th cs
|
| E2 (_, _, cs) -> iter check_th cs
|
||||||
in
|
in
|
||||||
check_th (read_st2 pdf)
|
check_th st2
|
||||||
|
|
||||||
(* <Formula> tag is missing an Alt attribute. *)
|
(* <Formula> tag is missing an Alt attribute. *)
|
||||||
let matterhorn_17_002 pdf =
|
let matterhorn_17_002 _ st2 pdf =
|
||||||
let rec check_fm = function
|
let rec check_fm = function
|
||||||
| E2 ("/Formula", attr, _) ->
|
| E2 ("/Formula", attr, _) ->
|
||||||
if not (List.mem "/Alt" attr) then merror ()
|
if not (List.mem "/Alt" attr) then merror ()
|
||||||
| E2 (_, _, cs) -> iter check_fm cs
|
| E2 (_, _, cs) -> iter check_fm cs
|
||||||
in
|
in
|
||||||
check_fm (read_st2 pdf)
|
check_fm st2
|
||||||
|
|
||||||
(* Unicode mapping requirements are not met. *)
|
(* Unicode mapping requirements are not met. *)
|
||||||
let matterhorn_17_003 pdf =
|
let matterhorn_17_003 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* ID entry of the <Note> tag is not present. *)
|
(* ID entry of the <Note> tag is not present. *)
|
||||||
let matterhorn_19_003 pdf =
|
let matterhorn_19_003 st st2 pdf =
|
||||||
let rec check_note = function
|
let rec check_note = function
|
||||||
| E2 ("/Note", attr, _) ->
|
| E2 ("/Note", attr, _) ->
|
||||||
if not (List.mem "/ID" attr) then merror ()
|
if not (List.mem "/ID" attr) then merror ()
|
||||||
| E2 (_, _, cs) -> iter check_note cs
|
| E2 (_, _, cs) -> iter check_note cs
|
||||||
in
|
in
|
||||||
check_note (read_st2 pdf)
|
check_note st2
|
||||||
|
|
||||||
(* ID entry of the <Note> tag is non-unique. *)
|
(* ID entry of the <Note> tag is non-unique. *)
|
||||||
let matterhorn_19_004 pdf =
|
let matterhorn_19_004 _ _ pdf =
|
||||||
(* Looking for /Type /StructElem /N /Note /ID to exist. *)
|
(* Looking for /Type /StructElem /N /Note /ID to exist. *)
|
||||||
(* FIXME ClassMaps here? *)
|
(* FIXME ClassMaps here? *)
|
||||||
let ids = ref [] in
|
let ids = ref [] in
|
||||||
|
@ -494,13 +484,12 @@ let matterhorn_19_004 pdf =
|
||||||
| Some (Pdf.Name "/StructElem"), Some (Pdf.Name "/Note"), Some (Pdf.String s) -> ids := s::!ids
|
| Some (Pdf.Name "/StructElem"), Some (Pdf.Name "/Note"), Some (Pdf.String s) -> ids := s::!ids
|
||||||
| _ -> ())
|
| _ -> ())
|
||||||
pdf;
|
pdf;
|
||||||
(* FIXME should write the big setify here, fix up in CamlPDF - this is a good example. *)
|
if length (setify_large !ids) < length !ids then merror ()
|
||||||
if length (setify !ids) < length !ids then merror ()
|
|
||||||
|
|
||||||
(* Name entry is missing or has an empty string as its value in an Optional
|
(* Name entry is missing or has an empty string as its value in an Optional
|
||||||
Content Configuration Dictionary in the Configs entry in the OCProperties
|
Content Configuration Dictionary in the Configs entry in the OCProperties
|
||||||
entry in the Catalog dictionary. *)
|
entry in the Catalog dictionary. *)
|
||||||
let matterhorn_20_001 pdf =
|
let matterhorn_20_001 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/Configs"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/Configs"] with
|
||||||
| Some (Pdf.Array occds) ->
|
| Some (Pdf.Array occds) ->
|
||||||
iter (function x -> match Pdf.lookup_direct pdf "/Name" x with None | Some (Pdf.Name "") -> merror () | _ -> ()) occds
|
iter (function x -> match Pdf.lookup_direct pdf "/Name" x with None | Some (Pdf.Name "") -> merror () | _ -> ()) occds
|
||||||
|
@ -509,13 +498,13 @@ let matterhorn_20_001 pdf =
|
||||||
(* Name entry is missing or has an empty string as its value in an Optional
|
(* Name entry is missing or has an empty string as its value in an Optional
|
||||||
Content Configuration Dictionary that is the value of the D entry in the
|
Content Configuration Dictionary that is the value of the D entry in the
|
||||||
OCProperties entry in the Catalog dictionary. *)
|
OCProperties entry in the Catalog dictionary. *)
|
||||||
let matterhorn_20_002 pdf =
|
let matterhorn_20_002 _ _ pdf =
|
||||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/Name"] with
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/Name"] with
|
||||||
| Some (Pdf.String "") | None -> merror ()
|
| Some (Pdf.String "") | None -> merror ()
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
|
||||||
(* An AS entry appears in an Optional Content Configuration Dictionary. *)
|
(* An AS entry appears in an Optional Content Configuration Dictionary. *)
|
||||||
let matterhorn_20_003 pdf =
|
let matterhorn_20_003 _ _ pdf =
|
||||||
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/AS"] with
|
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/OCProperties"; "/D"; "/AS"] with
|
||||||
| Some _ -> merror ()
|
| Some _ -> merror ()
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
|
@ -528,7 +517,7 @@ let matterhorn_20_003 pdf =
|
||||||
|
|
||||||
(* The file specification dictionary for an embedded file does not contain F
|
(* The file specification dictionary for an embedded file does not contain F
|
||||||
and UF entries. *)
|
and UF entries. *)
|
||||||
let matterhorn_21_001 pdf =
|
let matterhorn_21_001 _ _ pdf =
|
||||||
let from_nametree =
|
let from_nametree =
|
||||||
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
|
||||||
| Some catalog ->
|
| Some catalog ->
|
||||||
|
@ -562,7 +551,7 @@ let matterhorn_21_001 pdf =
|
||||||
merror ()
|
merror ()
|
||||||
|
|
||||||
(* File contains the dynamicRender element with value “required”. *)
|
(* File contains the dynamicRender element with value “required”. *)
|
||||||
let matterhorn_25_001 pdf =
|
let matterhorn_25_001 _ _ pdf =
|
||||||
let rec contains_required_dynamicRender = function
|
let rec contains_required_dynamicRender = function
|
||||||
| Cpdfmetadata.E (((_, "dynamicRender"), _), [Cpdfmetadata.D "required"]) -> true
|
| Cpdfmetadata.E (((_, "dynamicRender"), _), [Cpdfmetadata.D "required"]) -> true
|
||||||
| Cpdfmetadata.E (_, children) -> List.exists contains_required_dynamicRender children
|
| Cpdfmetadata.E (_, children) -> List.exists contains_required_dynamicRender children
|
||||||
|
@ -597,13 +586,13 @@ let matterhorn_25_001 pdf =
|
||||||
|
|
||||||
(* The file is encrypted but does not contain a P entry in its encryption
|
(* The file is encrypted but does not contain a P entry in its encryption
|
||||||
dictionary. *)
|
dictionary. *)
|
||||||
let matterhorn_26_001 pdf = ()
|
let matterhorn_26_001 _ _ pdf = ()
|
||||||
(* Would already have failed at this point, because CamlPDF does not allow
|
(* Would already have failed at this point, because CamlPDF does not allow
|
||||||
the decryption of a file with no /P *)
|
the decryption of a file with no /P. FIXME Can we make it fail with the right message? *)
|
||||||
|
|
||||||
(* The file is encrypted and does contain a P entry but the 10th bit position
|
(* The file is encrypted and does contain a P entry but the 10th bit position
|
||||||
of the P entry is false. *)
|
of the P entry is false. *)
|
||||||
let matterhorn_26_002 pdf =
|
let matterhorn_26_002 _ _ pdf =
|
||||||
match pdf.Pdf.saved_encryption with
|
match pdf.Pdf.saved_encryption with
|
||||||
| None -> ()
|
| None -> ()
|
||||||
| Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} ->
|
| Some {Pdf.from_get_encryption_values = (_, _, _, p, _, _, _)} ->
|
||||||
|
@ -611,20 +600,20 @@ let matterhorn_26_002 pdf =
|
||||||
|
|
||||||
(* An annotation, other than of subtype Widget, Link and PrinterMark, is not a
|
(* An annotation, other than of subtype Widget, Link and PrinterMark, is not a
|
||||||
direct child of an <Annot> structure element. *)
|
direct child of an <Annot> structure element. *)
|
||||||
let matterhorn_28_002 pdf = todo ()
|
let matterhorn_28_002 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* An annotation, other than of subtype Widget, does not have a Contents entry
|
(* An annotation, other than of subtype Widget, does not have a Contents entry
|
||||||
and does not have an alternative description (in the form of an Alt entry in
|
and does not have an alternative description (in the form of an Alt entry in
|
||||||
the enclosing structure element). *)
|
the enclosing structure element). *)
|
||||||
let matterhorn_28_004 pdf = todo ()
|
let matterhorn_28_004 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* A form field does not have a TU entry and does not have an alternative
|
(* A form field does not have a TU entry and does not have an alternative
|
||||||
description (in the form of an Alt entry in the enclosing structure
|
description (in the form of an Alt entry in the enclosing structure
|
||||||
element). *)
|
element). *)
|
||||||
let matterhorn_28_005 pdf = todo ()
|
let matterhorn_28_005 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *)
|
(* An annotation with subtype undefined in ISO 32000 does not meet 7.18.1. *)
|
||||||
let matterhorn_28_006 pdf =
|
let matterhorn_28_006 _ _ pdf =
|
||||||
if
|
if
|
||||||
List.exists
|
List.exists
|
||||||
(fun x -> match x.Pdfannot.subtype with Pdfannot.Unknown _ -> true | _ -> false)
|
(fun x -> match x.Pdfannot.subtype with Pdfannot.Unknown _ -> true | _ -> false)
|
||||||
|
@ -633,7 +622,7 @@ let matterhorn_28_006 pdf =
|
||||||
merror ()
|
merror ()
|
||||||
|
|
||||||
(* An annotation of subtype TrapNet exists. *)
|
(* An annotation of subtype TrapNet exists. *)
|
||||||
let matterhorn_28_007 pdf =
|
let matterhorn_28_007 _ _ pdf =
|
||||||
if
|
if
|
||||||
List.exists
|
List.exists
|
||||||
(fun x -> x.Pdfannot.subtype = Pdfannot.TrapNet)
|
(fun x -> x.Pdfannot.subtype = Pdfannot.TrapNet)
|
||||||
|
@ -642,7 +631,7 @@ let matterhorn_28_007 pdf =
|
||||||
merror ()
|
merror ()
|
||||||
|
|
||||||
(* A page containing an annotation does not contain a Tabs entry *)
|
(* A page containing an annotation does not contain a Tabs entry *)
|
||||||
let matterhorn_28_008 pdf =
|
let matterhorn_28_008 _ _ pdf =
|
||||||
if
|
if
|
||||||
List.exists
|
List.exists
|
||||||
(fun p ->
|
(fun p ->
|
||||||
|
@ -653,7 +642,7 @@ let matterhorn_28_008 pdf =
|
||||||
|
|
||||||
(* A page containing an annotation has a Tabs entry with a value other than S.
|
(* A page containing an annotation has a Tabs entry with a value other than S.
|
||||||
*)
|
*)
|
||||||
let matterhorn_28_009 pdf =
|
let matterhorn_28_009 _ _ pdf =
|
||||||
if
|
if
|
||||||
List.exists
|
List.exists
|
||||||
(fun p ->
|
(fun p ->
|
||||||
|
@ -665,14 +654,14 @@ let matterhorn_28_009 pdf =
|
||||||
merror ()
|
merror ()
|
||||||
|
|
||||||
(* A widget annotation is not nested within a <Form> tag. *)
|
(* A widget annotation is not nested within a <Form> tag. *)
|
||||||
let matterhorn_28_010 pdf = todo ()
|
let matterhorn_28_010 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* A link annotation is not nested within a <Link> tag. *)
|
(* A link annotation is not nested within a <Link> tag. *)
|
||||||
let matterhorn_28_011 pdf = todo ()
|
let matterhorn_28_011 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* A link annotation does not include an alternate description in its Contents
|
(* A link annotation does not include an alternate description in its Contents
|
||||||
entry. *)
|
entry. *)
|
||||||
let matterhorn_28_012 pdf =
|
let matterhorn_28_012 _ _ pdf =
|
||||||
if
|
if
|
||||||
List.exists
|
List.exists
|
||||||
(fun x -> x.Pdfannot.subtype = Pdfannot.Link && x.Pdfannot.annot_contents = None )
|
(fun x -> x.Pdfannot.subtype = Pdfannot.Link && x.Pdfannot.annot_contents = None )
|
||||||
|
@ -681,7 +670,7 @@ let matterhorn_28_012 pdf =
|
||||||
merror ()
|
merror ()
|
||||||
|
|
||||||
(* CT entry is missing from the media clip data dictionary. *)
|
(* CT entry is missing from the media clip data dictionary. *)
|
||||||
let matterhorn_28_014 pdf =
|
let matterhorn_28_014 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
|
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
|
||||||
|
@ -690,7 +679,7 @@ let matterhorn_28_014 pdf =
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
(* Alt entry is missing from the media clip data dictionary. *)
|
(* Alt entry is missing from the media clip data dictionary. *)
|
||||||
let matterhorn_28_015 pdf =
|
let matterhorn_28_015 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
|
match Pdf.lookup_direct pdf "/Type" o, Pdf.lookup_direct pdf "/S" o, Pdf.lookup_direct pdf "/CT" o with
|
||||||
|
@ -699,18 +688,19 @@ let matterhorn_28_015 pdf =
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
(* File attachment annotations do not conform to 7.11. *)
|
(* File attachment annotations do not conform to 7.11. *)
|
||||||
let matterhorn_28_016 pdf =
|
let matterhorn_28_016 _ _ pdf =
|
||||||
|
(* FIXME ?? *)
|
||||||
covered_elsewhere ()
|
covered_elsewhere ()
|
||||||
|
|
||||||
(* A PrinterMark annotation is included in the logical structure. *)
|
(* A PrinterMark annotation is included in the logical structure. *)
|
||||||
let matterhorn_28_017 pdf = todo ()
|
let matterhorn_28_017 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* The appearance stream of a PrinterMark annotation is not marked as Artifact.
|
(* The appearance stream of a PrinterMark annotation is not marked as Artifact.
|
||||||
*)
|
*)
|
||||||
let matterhorn_28_018 pdf = todo ()
|
let matterhorn_28_018 _ _ pdf = todo ()
|
||||||
|
|
||||||
(* A reference XObject is present. *)
|
(* A reference XObject is present. *)
|
||||||
let matterhorn_30_001 pdf =
|
let matterhorn_30_001 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Ref" o with
|
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Ref" o with
|
||||||
|
@ -719,7 +709,7 @@ let matterhorn_30_001 pdf =
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
(* Form XObject contains MCIDs and is referenced more than once. *)
|
(* Form XObject contains MCIDs and is referenced more than once. *)
|
||||||
let matterhorn_30_002 pdf =
|
let matterhorn_30_002 _ _ pdf =
|
||||||
(* We need to consider inheritence here. What solutions do we already have
|
(* We need to consider inheritence here. What solutions do we already have
|
||||||
for that, and do we need anything new? *)
|
for that, and do we need anything new? *)
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
@ -727,7 +717,7 @@ let matterhorn_30_002 pdf =
|
||||||
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
||||||
has values for Registry in both CIDSystemInfo dictionaries that are not
|
has values for Registry in both CIDSystemInfo dictionaries that are not
|
||||||
identical. *)
|
identical. *)
|
||||||
let matterhorn_31_001 pdf =
|
let matterhorn_31_001 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
|
match Pdf.lookup_direct pdf "/Subtype" o, Pdf.lookup_direct pdf "/Encoding" o with
|
||||||
|
@ -742,19 +732,19 @@ let matterhorn_31_001 pdf =
|
||||||
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
||||||
has values for Ordering in both CIDSystemInfo dictionaries that are not
|
has values for Ordering in both CIDSystemInfo dictionaries that are not
|
||||||
identical. *)
|
identical. *)
|
||||||
let matterhorn_31_002 pdf =
|
let matterhorn_31_002 st st2 pdf =
|
||||||
matterhorn_31_001 pdf
|
matterhorn_31_001 st st2 pdf
|
||||||
|
|
||||||
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
(* A Type 0 font dictionary with encoding other than Identity-H and Identity-V
|
||||||
has a value for Supplement in the CIDSystemInfo dictionary of the CID font
|
has a value for Supplement in the CIDSystemInfo dictionary of the CID font
|
||||||
that is less than the value for Supplement in the CIDSystemInfo dictionary
|
that is less than the value for Supplement in the CIDSystemInfo dictionary
|
||||||
of the CMap. *)
|
of the CMap. *)
|
||||||
let matterhorn_31_003 pdf =
|
let matterhorn_31_003 st st2 pdf =
|
||||||
matterhorn_31_001 pdf
|
matterhorn_31_001 st st2 pdf
|
||||||
|
|
||||||
(* A Type 2 CID font contains neither a stream nor the name Identity as the
|
(* A Type 2 CID font contains neither a stream nor the name Identity as the
|
||||||
value of the CIDToGIDMap entry. *)
|
value of the CIDToGIDMap entry. *)
|
||||||
let matterhorn_31_004 pdf =
|
let matterhorn_31_004 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ n ->
|
(fun _ n ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" n with
|
match Pdf.lookup_direct pdf "/Subtype" n with
|
||||||
|
@ -767,7 +757,7 @@ let matterhorn_31_004 pdf =
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
(* A Type 2 CID font does not contain a CIDToGIDMap entry. *)
|
(* A Type 2 CID font does not contain a CIDToGIDMap entry. *)
|
||||||
let matterhorn_31_005 pdf =
|
let matterhorn_31_005 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ n ->
|
(fun _ n ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" n with
|
match Pdf.lookup_direct pdf "/Subtype" n with
|
||||||
|
@ -842,7 +832,7 @@ let cmap_names =
|
||||||
"/Identity-H";
|
"/Identity-H";
|
||||||
"/Identity-V"]
|
"/Identity-V"]
|
||||||
|
|
||||||
let matterhorn_31_006 pdf =
|
let matterhorn_31_006 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -856,65 +846,65 @@ let matterhorn_31_006 pdf =
|
||||||
|
|
||||||
(* The WMode entry in a CMap dictionary is not identical to the WMode value in
|
(* The WMode entry in a CMap dictionary is not identical to the WMode value in
|
||||||
the CMap stream. *)
|
the CMap stream. *)
|
||||||
let matterhorn_31_007 pdf =
|
let matterhorn_31_007 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* A CMap references another CMap which is not listed in ISO 32000-1:2008,
|
(* A CMap references another CMap which is not listed in ISO 32000-1:2008,
|
||||||
9.7.5.2, Table 118. *)
|
9.7.5.2, Table 118. *)
|
||||||
let matterhorn_31_008 pdf =
|
let matterhorn_31_008 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* For a font used by text intended to be rendered the font program is not
|
(* For a font used by text intended to be rendered the font program is not
|
||||||
embedded. *)
|
embedded. *)
|
||||||
(* NB This, for now, reports all unembedded fonts. *)
|
(* NB This, for now, reports all unembedded fonts. *)
|
||||||
let matterhorn_31_009 pdf =
|
let matterhorn_31_009 _ _ pdf =
|
||||||
let l = Cpdffont.missing_fonts_return pdf (ilist 1 (Pdfpage.endpage pdf)) in
|
let l = Cpdffont.missing_fonts_return pdf (ilist 1 (Pdfpage.endpage pdf)) in
|
||||||
if l <> [] then
|
if l <> [] then
|
||||||
raise (MatterhornError (`List (map (fun x -> `String x) l)))
|
raise (MatterhornError (`List (map (fun x -> `String x) l)))
|
||||||
|
|
||||||
(* For a font used by text the font program is embedded but it does not contain
|
(* For a font used by text the font program is embedded but it does not contain
|
||||||
glyphs for all of the glyphs referenced by the text used for rendering. *)
|
glyphs for all of the glyphs referenced by the text used for rendering. *)
|
||||||
let matterhorn_31_011 pdf =
|
let matterhorn_31_011 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
|
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
|
||||||
string, but at least one of the glyphs present in the font program is not
|
string, but at least one of the glyphs present in the font program is not
|
||||||
listed in the CharSet string. *)
|
listed in the CharSet string. *)
|
||||||
let matterhorn_31_012 pdf =
|
let matterhorn_31_012 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
|
(* The FontDescriptor dictionary of an embedded Type 1 font contains a CharSet
|
||||||
string, but at least one of the glyphs listed in the CharSet string is not
|
string, but at least one of the glyphs listed in the CharSet string is not
|
||||||
present in the font program. *)
|
present in the font program. *)
|
||||||
let matterhorn_31_013 pdf =
|
let matterhorn_31_013 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
|
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
|
||||||
string, but at least one of the glyphs present in the font program is not
|
string, but at least one of the glyphs present in the font program is not
|
||||||
listed in the CIDSet string. *)
|
listed in the CIDSet string. *)
|
||||||
let matterhorn_31_014 pdf =
|
let matterhorn_31_014 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
|
(* The FontDescriptor dictionary of an embedded CID font contains a CIDSet
|
||||||
string, but at least one of the glyphs listed in the CIDSet string is not
|
string, but at least one of the glyphs listed in the CIDSet string is not
|
||||||
present in the font program. *)
|
present in the font program. *)
|
||||||
let matterhorn_31_015 pdf =
|
let matterhorn_31_015 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* For one or more glyphs, the glyph width information in the font dictionary
|
(* For one or more glyphs, the glyph width information in the font dictionary
|
||||||
and in the embedded font program differ by more than 1/1000 unit. *)
|
and in the embedded font program differ by more than 1/1000 unit. *)
|
||||||
let matterhorn_31_016 pdf =
|
let matterhorn_31_016 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* A non-symbolic TrueType font is used for rendering, but none of the cmap
|
(* A non-symbolic TrueType font is used for rendering, but none of the cmap
|
||||||
entries in the embedded font program is a non-symbolic cmap. *)
|
entries in the embedded font program is a non-symbolic cmap. *)
|
||||||
let matterhorn_31_017 pdf =
|
let matterhorn_31_017 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* A non-symbolic TrueType font is used for rendering, but for at least one
|
(* A non-symbolic TrueType font is used for rendering, but for at least one
|
||||||
glyph to be rendered the glyph cannot be looked up by any of the
|
glyph to be rendered the glyph cannot be looked up by any of the
|
||||||
non-symbolic cmap entries in the embedded font program. *)
|
non-symbolic cmap entries in the embedded font program. *)
|
||||||
let matterhorn_31_018 pdf =
|
let matterhorn_31_018 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The font dictionary for a non-symbolic TrueType font does not contain an
|
(* The font dictionary for a non-symbolic TrueType font does not contain an
|
||||||
|
@ -928,7 +918,7 @@ let is_non_symbolic pdf o =
|
||||||
end
|
end
|
||||||
| None -> true
|
| None -> true
|
||||||
|
|
||||||
let matterhorn_31_019 pdf =
|
let matterhorn_31_019 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -942,7 +932,7 @@ let matterhorn_31_019 pdf =
|
||||||
|
|
||||||
(* The font dictionary for a non-symbolic TrueType font contains an Encoding
|
(* The font dictionary for a non-symbolic TrueType font contains an Encoding
|
||||||
dictionary which does not contain a BaseEncoding entry. *)
|
dictionary which does not contain a BaseEncoding entry. *)
|
||||||
let matterhorn_31_020 pdf =
|
let matterhorn_31_020 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -961,7 +951,7 @@ let matterhorn_31_020 pdf =
|
||||||
(* The value for either the Encoding entry or the BaseEncoding entry in the
|
(* The value for either the Encoding entry or the BaseEncoding entry in the
|
||||||
Encoding dictionary in a non-symbolic TrueType font dictionary is neither
|
Encoding dictionary in a non-symbolic TrueType font dictionary is neither
|
||||||
MacRomanEncoding nor WinAnsiEncoding. *)
|
MacRomanEncoding nor WinAnsiEncoding. *)
|
||||||
let matterhorn_31_021 pdf =
|
let matterhorn_31_021 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -983,7 +973,7 @@ let matterhorn_31_021 pdf =
|
||||||
(* The Differences array in the Encoding entry in a non-symbolic TrueType font
|
(* The Differences array in the Encoding entry in a non-symbolic TrueType font
|
||||||
dictionary contains one or more glyph names which are not listed in the
|
dictionary contains one or more glyph names which are not listed in the
|
||||||
Adobe Glyph List. *)
|
Adobe Glyph List. *)
|
||||||
let matterhorn_31_022 pdf =
|
let matterhorn_31_022 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -1005,12 +995,12 @@ let matterhorn_31_022 pdf =
|
||||||
(* The Differences array is present in the Encoding entry in a non-symbolic
|
(* The Differences array is present in the Encoding entry in a non-symbolic
|
||||||
TrueType font dictionary but the embedded font program does not contain a
|
TrueType font dictionary but the embedded font program does not contain a
|
||||||
(3,1) Microsoft Unicode cmap. *)
|
(3,1) Microsoft Unicode cmap. *)
|
||||||
let matterhorn_31_023 pdf =
|
let matterhorn_31_023 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The Encoding entry is present in the font dictionary for a symbolic TrueType
|
(* The Encoding entry is present in the font dictionary for a symbolic TrueType
|
||||||
font. *)
|
font. *)
|
||||||
let matterhorn_31_024 pdf =
|
let matterhorn_31_024 _ _ pdf =
|
||||||
Pdf.objiter
|
Pdf.objiter
|
||||||
(fun _ o ->
|
(fun _ o ->
|
||||||
match Pdf.lookup_direct pdf "/Subtype" o with
|
match Pdf.lookup_direct pdf "/Subtype" o with
|
||||||
|
@ -1023,12 +1013,12 @@ let matterhorn_31_024 pdf =
|
||||||
pdf
|
pdf
|
||||||
|
|
||||||
(* The embedded font program for a symbolic TrueType font contains no cmap. *)
|
(* The embedded font program for a symbolic TrueType font contains no cmap. *)
|
||||||
let matterhorn_31_025 pdf =
|
let matterhorn_31_025 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* The embedded font program for a symbolic TrueType font contains more than
|
(* The embedded font program for a symbolic TrueType font contains more than
|
||||||
one cmap, but none of the cmap entries is a (3,0) Microsoft Symbol cmap. *)
|
one cmap, but none of the cmap entries is a (3,0) Microsoft Symbol cmap. *)
|
||||||
let matterhorn_31_026 pdf =
|
let matterhorn_31_026 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* A font dictionary does not contain the ToUnicode entry and none of the
|
(* A font dictionary does not contain the ToUnicode entry and none of the
|
||||||
|
@ -1039,7 +1029,7 @@ let matterhorn_31_026 pdf =
|
||||||
Annex D; the font is a Type 0 font, and its descendant CIDFont uses
|
Annex D; the font is a Type 0 font, and its descendant CIDFont uses
|
||||||
Adobe-GB1, Adobe-CNS1, Adobe-Japan1 or Adobe-Korea1 character collections;
|
Adobe-GB1, Adobe-CNS1, Adobe-Japan1 or Adobe-Korea1 character collections;
|
||||||
the font is a non-symbolic TrueType font. *)
|
the font is a non-symbolic TrueType font. *)
|
||||||
let matterhorn_31_027 pdf =
|
let matterhorn_31_027 _ _ pdf =
|
||||||
not_fully_implemented ();
|
not_fully_implemented ();
|
||||||
(* Here, we implement most of this one, but can't check the set of referenced
|
(* Here, we implement most of this one, but can't check the set of referenced
|
||||||
glyphs for Type1 / Type3. *)
|
glyphs for Type1 / Type3. *)
|
||||||
|
@ -1099,7 +1089,7 @@ let check_unicode tu n =
|
||||||
mem n (flatten (map (fun x -> Pdftext.codepoints_of_utf16be (snd x)) tu))
|
mem n (flatten (map (fun x -> Pdftext.codepoints_of_utf16be (snd x)) tu))
|
||||||
|
|
||||||
(* One or more Unicode values specified in the ToUnicode CMap are zero (0). *)
|
(* One or more Unicode values specified in the ToUnicode CMap are zero (0). *)
|
||||||
let matterhorn_31_028 pdf =
|
let matterhorn_31_028 _ _ pdf =
|
||||||
iter
|
iter
|
||||||
(fun i ->
|
(fun i ->
|
||||||
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
|
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
|
||||||
|
@ -1108,7 +1098,7 @@ let matterhorn_31_028 pdf =
|
||||||
|
|
||||||
(* One or more Unicode values specified in the ToUnicode CMap are equal to
|
(* One or more Unicode values specified in the ToUnicode CMap are equal to
|
||||||
either U+FEFF or U+FFFE. *)
|
either U+FEFF or U+FFFE. *)
|
||||||
let matterhorn_31_029 pdf =
|
let matterhorn_31_029 _ _ pdf =
|
||||||
iter
|
iter
|
||||||
(fun i ->
|
(fun i ->
|
||||||
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
|
let tu = Pdftext.parse_tounicode pdf (Pdf.lookup_obj pdf i) in
|
||||||
|
@ -1117,7 +1107,7 @@ let matterhorn_31_029 pdf =
|
||||||
|
|
||||||
(* One or more characters used in text showing operators reference the .notdef
|
(* One or more characters used in text showing operators reference the .notdef
|
||||||
glyph. *)
|
glyph. *)
|
||||||
let matterhorn_31_030 pdf =
|
let matterhorn_31_030 _ _ pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
let matterhorn =
|
let matterhorn =
|
||||||
|
@ -1211,9 +1201,11 @@ let matterhorn =
|
||||||
]
|
]
|
||||||
|
|
||||||
let test_matterhorn pdf =
|
let test_matterhorn pdf =
|
||||||
|
let st2 = read_st2 pdf in
|
||||||
|
let st = st_of_st2 st2 in
|
||||||
option_map
|
option_map
|
||||||
(fun (name, error, section, test) ->
|
(fun (name, error, section, test) ->
|
||||||
try test pdf; None with
|
try test st st2 pdf; None with
|
||||||
| MatterhornError extra -> Some (name, error, section, extra)
|
| MatterhornError extra -> Some (name, error, section, extra)
|
||||||
| MatterhornUnimplemented -> None
|
| MatterhornUnimplemented -> None
|
||||||
| e -> Some (name, "Incomplete", section, `String ("ERROR: " ^ Printexc.to_string e)))
|
| e -> Some (name, "Incomplete", section, `String ("ERROR: " ^ Printexc.to_string e)))
|
||||||
|
|
Loading…
Reference in New Issue