Report natural language
This commit is contained in:
parent
9a76c291ae
commit
9d3c4384e4
1
Changes
1
Changes
|
@ -8,6 +8,7 @@ o Verify compliance to PDF/UA via the Matterhorn protocol
|
|||
o Extract, edit and reapply document structure tree
|
||||
o Split structure tree when splitting PDF to save size
|
||||
o Combine structure trees when stamping PDFs
|
||||
o Report natural language on -info
|
||||
|
||||
2.7 (February 2024)
|
||||
|
||||
|
|
|
@ -442,6 +442,11 @@ let determine_subformats pdf =
|
|||
end;
|
||||
!formats
|
||||
|
||||
let language pdf =
|
||||
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with
|
||||
| Some (Pdf.String x) -> Some x
|
||||
| _ -> None
|
||||
|
||||
let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf =
|
||||
let notjson = !json = [("none", `Null)] in
|
||||
let print_out tree title namespace name =
|
||||
|
@ -459,6 +464,9 @@ let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf =
|
|||
if notjson
|
||||
then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf))
|
||||
else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf)));
|
||||
if notjson
|
||||
then Printf.printf "Language: %s\n" (match language pdf with None -> "" | Some x -> "\"" ^ x ^ "\"")
|
||||
else json =| ("Language", match language pdf with None -> `Null | Some x -> `String x);
|
||||
match get_metadata pdf with
|
||||
None -> ()
|
||||
| Some metadata ->
|
||||
|
|
|
@ -87,6 +87,8 @@ val get_viewer_pref_item : string -> Pdf.t -> string
|
|||
|
||||
val determine_subformats : Pdf.t -> string list
|
||||
|
||||
val language : Pdf.t -> string option
|
||||
|
||||
val adobe : string
|
||||
val xmp : string
|
||||
val dc : string
|
||||
|
|
|
@ -164,7 +164,8 @@ let matterhorn_10_001 pdf =
|
|||
unimpl ()
|
||||
|
||||
(* Natural language for text in page content cannot be determined. *)
|
||||
let matterhorn_11_001 pdf = todo ()
|
||||
let matterhorn_11_001 pdf =
|
||||
unimpl ()
|
||||
|
||||
(* Natural language for text in Alt, ActualText and E attributes cannot be
|
||||
determined. *)
|
||||
|
|
Loading…
Reference in New Issue