From 9d3c4384e43ea1388c15df97b0e31d471cc972b3 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 12 Jun 2024 16:15:33 +0100 Subject: [PATCH] Report natural language --- Changes | 1 + cpdfmetadata.ml | 8 ++++++++ cpdfmetadata.mli | 2 ++ cpdfua.ml | 3 ++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 2b0e072..773c83b 100644 --- a/Changes +++ b/Changes @@ -8,6 +8,7 @@ o Verify compliance to PDF/UA via the Matterhorn protocol o Extract, edit and reapply document structure tree o Split structure tree when splitting PDF to save size o Combine structure trees when stamping PDFs +o Report natural language on -info 2.7 (February 2024) diff --git a/cpdfmetadata.ml b/cpdfmetadata.ml index 0b6cb72..6e43839 100644 --- a/cpdfmetadata.ml +++ b/cpdfmetadata.ml @@ -442,6 +442,11 @@ let determine_subformats pdf = end; !formats +let language pdf = + match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with + | Some (Pdf.String x) -> Some x + | _ -> None + let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf = let notjson = !json = [("none", `Null)] in let print_out tree title namespace name = @@ -459,6 +464,9 @@ let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf = if notjson then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf)) else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf))); + if notjson + then Printf.printf "Language: %s\n" (match language pdf with None -> "" | Some x -> "\"" ^ x ^ "\"") + else json =| ("Language", match language pdf with None -> `Null | Some x -> `String x); match get_metadata pdf with None -> () | Some metadata -> diff --git a/cpdfmetadata.mli b/cpdfmetadata.mli index ebbda15..189be8c 100644 --- a/cpdfmetadata.mli +++ b/cpdfmetadata.mli @@ -87,6 +87,8 @@ val get_viewer_pref_item : string -> Pdf.t -> string val determine_subformats : Pdf.t -> string list +val language : Pdf.t -> string option + val adobe : string val xmp : string val dc : string diff --git a/cpdfua.ml b/cpdfua.ml index d76b2fd..8d3f77d 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -164,7 +164,8 @@ let matterhorn_10_001 pdf = unimpl () (* Natural language for text in page content cannot be determined. *) -let matterhorn_11_001 pdf = todo () +let matterhorn_11_001 pdf = + unimpl () (* Natural language for text in Alt, ActualText and E attributes cannot be determined. *)