Fixed headings checks
This commit is contained in:
parent
f6f01b3001
commit
facb9d3c75
33
cpdfua.ml
33
cpdfua.ml
|
@ -5,7 +5,7 @@ open Cpdferror
|
||||||
a) Those which require looking deep inside font files; and
|
a) Those which require looking deep inside font files; and
|
||||||
b) Those which require reading inside the graphics stream.
|
b) Those which require reading inside the graphics stream.
|
||||||
|
|
||||||
Experimental. Both false positive and false negativce results possible. *)
|
Experimental. Both false positive and false negative results possible. *)
|
||||||
|
|
||||||
exception MatterhornError of Cpdfyojson.Safe.t
|
exception MatterhornError of Cpdfyojson.Safe.t
|
||||||
|
|
||||||
|
@ -422,26 +422,27 @@ let is_hnum s =
|
||||||
begin try ignore (int_of_string (implode cs)); true with Failure _ -> false end
|
begin try ignore (int_of_string (implode cs)); true with Failure _ -> false end
|
||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
|
let num s = int_of_string (implode (tl (tl (explode s))))
|
||||||
|
|
||||||
|
let rec headings_list_of_tree (E (n, cs)) =
|
||||||
|
(if is_hnum n then [n] else []) @ flatten (map headings_list_of_tree cs)
|
||||||
|
|
||||||
(* Does use numbered headings, but the first heading tag is not <H1>. *)
|
(* Does use numbered headings, but the first heading tag is not <H1>. *)
|
||||||
let matterhorn_14_002 st st2 pdf =
|
let matterhorn_14_002 st _ _ =
|
||||||
let rec check_hn = function
|
match headings_list_of_tree st with
|
||||||
| E ("/H1", cs) -> ()
|
| [] | "/H1"::_ -> ()
|
||||||
| E (s, cs) when is_hnum s -> merror ()
|
| _ -> merror ()
|
||||||
| E (_, cs) -> iter check_hn cs
|
|
||||||
in
|
|
||||||
check_hn st
|
|
||||||
|
|
||||||
(* Numbered heading levels in descending sequence are skipped (Example: <H3>
|
(* Numbered heading levels in descending sequence are skipped (Example: <H3>
|
||||||
follows directly after <H1>). *)
|
follows directly after <H1>). *)
|
||||||
let matterhorn_14_003 st st2 pdf =
|
let matterhorn_14_003 st _ _ =
|
||||||
let rec check_nseq n = function
|
let rec check l = function
|
||||||
| E (s, cs) when is_hnum s ->
|
| [] -> ()
|
||||||
let num = int_of_string (implode (tl (tl (explode s)))) in
|
| n::ns ->
|
||||||
if num > n + 1 && n > 0 then merror_str (Printf.sprintf "%i -> %i" n num);
|
let nm = num n in
|
||||||
iter (check_nseq num) cs
|
if nm > l + 1 then merror_str (Printf.sprintf "%i -> %i" l nm) else check nm ns
|
||||||
| E (_, cs) -> iter (check_nseq n) cs
|
|
||||||
in
|
in
|
||||||
check_nseq 0 st
|
check 1 (headings_list_of_tree st)
|
||||||
|
|
||||||
(* A node contains more than one <H> tag. *)
|
(* A node contains more than one <H> tag. *)
|
||||||
let matterhorn_14_006 st st2 pdf =
|
let matterhorn_14_006 st st2 pdf =
|
||||||
|
|
Loading…
Reference in New Issue