Report natural language
This commit is contained in:
parent
9a76c291ae
commit
9d3c4384e4
1
Changes
1
Changes
|
@ -8,6 +8,7 @@ o Verify compliance to PDF/UA via the Matterhorn protocol
|
||||||
o Extract, edit and reapply document structure tree
|
o Extract, edit and reapply document structure tree
|
||||||
o Split structure tree when splitting PDF to save size
|
o Split structure tree when splitting PDF to save size
|
||||||
o Combine structure trees when stamping PDFs
|
o Combine structure trees when stamping PDFs
|
||||||
|
o Report natural language on -info
|
||||||
|
|
||||||
2.7 (February 2024)
|
2.7 (February 2024)
|
||||||
|
|
||||||
|
|
|
@ -442,6 +442,11 @@ let determine_subformats pdf =
|
||||||
end;
|
end;
|
||||||
!formats
|
!formats
|
||||||
|
|
||||||
|
let language pdf =
|
||||||
|
match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with
|
||||||
|
| Some (Pdf.String x) -> Some x
|
||||||
|
| _ -> None
|
||||||
|
|
||||||
let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf =
|
let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf =
|
||||||
let notjson = !json = [("none", `Null)] in
|
let notjson = !json = [("none", `Null)] in
|
||||||
let print_out tree title namespace name =
|
let print_out tree title namespace name =
|
||||||
|
@ -459,6 +464,9 @@ let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf =
|
||||||
if notjson
|
if notjson
|
||||||
then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf))
|
then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf))
|
||||||
else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf)));
|
else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf)));
|
||||||
|
if notjson
|
||||||
|
then Printf.printf "Language: %s\n" (match language pdf with None -> "" | Some x -> "\"" ^ x ^ "\"")
|
||||||
|
else json =| ("Language", match language pdf with None -> `Null | Some x -> `String x);
|
||||||
match get_metadata pdf with
|
match get_metadata pdf with
|
||||||
None -> ()
|
None -> ()
|
||||||
| Some metadata ->
|
| Some metadata ->
|
||||||
|
|
|
@ -87,6 +87,8 @@ val get_viewer_pref_item : string -> Pdf.t -> string
|
||||||
|
|
||||||
val determine_subformats : Pdf.t -> string list
|
val determine_subformats : Pdf.t -> string list
|
||||||
|
|
||||||
|
val language : Pdf.t -> string option
|
||||||
|
|
||||||
val adobe : string
|
val adobe : string
|
||||||
val xmp : string
|
val xmp : string
|
||||||
val dc : string
|
val dc : string
|
||||||
|
|
|
@ -164,7 +164,8 @@ let matterhorn_10_001 pdf =
|
||||||
unimpl ()
|
unimpl ()
|
||||||
|
|
||||||
(* Natural language for text in page content cannot be determined. *)
|
(* Natural language for text in page content cannot be determined. *)
|
||||||
let matterhorn_11_001 pdf = todo ()
|
let matterhorn_11_001 pdf =
|
||||||
|
unimpl ()
|
||||||
|
|
||||||
(* Natural language for text in Alt, ActualText and E attributes cannot be
|
(* Natural language for text in Alt, ActualText and E attributes cannot be
|
||||||
determined. *)
|
determined. *)
|
||||||
|
|
Loading…
Reference in New Issue