Fixed headings checks

This commit is contained in:
John Whitington 2024-07-02 16:16:11 +01:00
parent f6f01b3001
commit facb9d3c75
1 changed files with 18 additions and 17 deletions

View File

@ -5,7 +5,7 @@ open Cpdferror
a) Those which require looking deep inside font files; and a) Those which require looking deep inside font files; and
b) Those which require reading inside the graphics stream. b) Those which require reading inside the graphics stream.
Experimental. Both false positive and false negativce results possible. *) Experimental. Both false positive and false negative results possible. *)
exception MatterhornError of Cpdfyojson.Safe.t exception MatterhornError of Cpdfyojson.Safe.t
@ -422,27 +422,28 @@ let is_hnum s =
begin try ignore (int_of_string (implode cs)); true with Failure _ -> false end begin try ignore (int_of_string (implode cs)); true with Failure _ -> false end
| _ -> false | _ -> false
let num s = int_of_string (implode (tl (tl (explode s))))
let rec headings_list_of_tree (E (n, cs)) =
(if is_hnum n then [n] else []) @ flatten (map headings_list_of_tree cs)
(* Does use numbered headings, but the first heading tag is not <H1>. *) (* Does use numbered headings, but the first heading tag is not <H1>. *)
let matterhorn_14_002 st st2 pdf = let matterhorn_14_002 st _ _ =
let rec check_hn = function match headings_list_of_tree st with
| E ("/H1", cs) -> () | [] | "/H1"::_ -> ()
| E (s, cs) when is_hnum s -> merror () | _ -> merror ()
| E (_, cs) -> iter check_hn cs
in
check_hn st
(* Numbered heading levels in descending sequence are skipped (Example: <H3> (* Numbered heading levels in descending sequence are skipped (Example: <H3>
follows directly after <H1>). *) follows directly after <H1>). *)
let matterhorn_14_003 st st2 pdf = let matterhorn_14_003 st _ _ =
let rec check_nseq n = function let rec check l = function
| E (s, cs) when is_hnum s -> | [] -> ()
let num = int_of_string (implode (tl (tl (explode s)))) in | n::ns ->
if num > n + 1 && n > 0 then merror_str (Printf.sprintf "%i -> %i" n num); let nm = num n in
iter (check_nseq num) cs if nm > l + 1 then merror_str (Printf.sprintf "%i -> %i" l nm) else check nm ns
| E (_, cs) -> iter (check_nseq n) cs
in in
check_nseq 0 st check 1 (headings_list_of_tree st)
(* A node contains more than one <H> tag. *) (* A node contains more than one <H> tag. *)
let matterhorn_14_006 st st2 pdf = let matterhorn_14_006 st st2 pdf =
let found = ref false in let found = ref false in