Report mark info, and add matterhorn suspects test

This commit is contained in:
John Whitington 2024-06-18 13:11:31 +01:00
parent b9fb8ed5e2
commit 144723d8e7
3 changed files with 24 additions and 1 deletions

View File

@ -9,6 +9,7 @@ o Extract, edit and reapply document structure tree
o Split structure tree when splitting PDF to save size o Split structure tree when splitting PDF to save size
o Combine structure trees when stamping PDFs o Combine structure trees when stamping PDFs
o Report natural language on -info o Report natural language on -info
o Report mark information dictionary contents on -info
2.7 (February 2024) 2.7 (February 2024)

View File

@ -191,6 +191,19 @@ let get_viewer_pref_item name pdf =
end end
| _ -> "" | _ -> ""
let get_markinfo_item name pdf =
match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
| Some catalog ->
begin match Pdf.lookup_direct pdf "/MarkInfo" catalog with
| Some d ->
begin match Pdf.lookup_direct pdf name d with
| Some (Pdf.Boolean b) -> b
| _ -> false
end
| None -> false
end
| _ -> false
let output_info ?(json=ref [("none", `Null)]) encoding pdf = let output_info ?(json=ref [("none", `Null)]) encoding pdf =
let notjson = !json = [("none", `Null)] in let notjson = !json = [("none", `Null)] in
let getstring = getstring encoding pdf in let getstring = getstring encoding pdf in
@ -242,6 +255,12 @@ let output_info ?(json=ref [("none", `Null)]) encoding pdf =
json =| ("NonFullPageScreenMode", match (get_viewer_pref_item "/NonFullPageScreenMode" pdf) with "" -> `Null | x -> `String x); json =| ("NonFullPageScreenMode", match (get_viewer_pref_item "/NonFullPageScreenMode" pdf) with "" -> `Null | x -> `String x);
if notjson then Printf.printf "AcroForm: %s\n" (match get_catalog_item "/AcroForm" pdf with "" -> "False" | x -> x); if notjson then Printf.printf "AcroForm: %s\n" (match get_catalog_item "/AcroForm" pdf with "" -> "False" | x -> x);
json =| ("AcroForm", match (get_catalog_item "/AcroForm" pdf) with "" -> `Bool false | x -> `Bool true); json =| ("AcroForm", match (get_catalog_item "/AcroForm" pdf) with "" -> `Bool false | x -> `Bool true);
if notjson then Printf.printf "Marked: %s\n" (match get_markinfo_item "/Marked" pdf with true -> "True" | _ -> "False");
json =| ("Marked", `Bool (get_markinfo_item "/Marked" pdf));
if notjson then Printf.printf "UserProperties: %s\n" (match get_markinfo_item "/UserProperties" pdf with true -> "True" | _ -> "False");
json =| ("UserProperties", `Bool (get_markinfo_item "/UserProperties" pdf));
if notjson then Printf.printf "Suspects: %s\n" (match get_markinfo_item "/Suspects" pdf with true -> "True" | _ -> "False");
json =| ("Suspects", `Bool (get_markinfo_item "/Suspects" pdf));
if notjson then if notjson then
begin begin
Printf.printf "MediaBox: "; Printf.printf "MediaBox: ";

View File

@ -55,7 +55,10 @@ let matterhorn_01_004 pdf = todo ()
let matterhorn_01_005 pdf = todo () let matterhorn_01_005 pdf = todo ()
(* Suspects entry has a value of true. *) (* Suspects entry has a value of true. *)
let matterhorn_01_007 pdf = todo () let matterhorn_01_007 pdf =
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/MarkInfo"; "/Suspects"] with
| Some (Pdf.Boolean true) -> merror ()
| _ -> ()
(* Here, for now, we allow the ISO 32000 and ISO 32000-2 *) (* Here, for now, we allow the ISO 32000 and ISO 32000-2 *)
(* FIXME which verison of PDF/UA are we doing? Can we do both? or pick? *) (* FIXME which verison of PDF/UA are we doing? Can we do both? or pick? *)