mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Report natural language
This commit is contained in:
		
							
								
								
									
										1
									
								
								Changes
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								Changes
									
									
									
									
									
								
							| @@ -8,6 +8,7 @@ o Verify compliance to PDF/UA via the Matterhorn protocol | ||||
| o Extract, edit and reapply document structure tree | ||||
| o Split structure tree when splitting PDF to save size | ||||
| o Combine structure trees when stamping PDFs | ||||
| o Report natural language on -info | ||||
|  | ||||
| 2.7 (February 2024) | ||||
|  | ||||
|   | ||||
| @@ -442,6 +442,11 @@ let determine_subformats pdf = | ||||
|           end; | ||||
|           !formats | ||||
|  | ||||
| let language pdf = | ||||
|   match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Lang"] with | ||||
|   | Some (Pdf.String x) -> Some x | ||||
|   | _ -> None | ||||
|  | ||||
| let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf = | ||||
|   let notjson = !json = [("none", `Null)] in | ||||
|   let print_out tree title namespace name = | ||||
| @@ -459,6 +464,9 @@ let output_xmp_info ?(json=ref [("none", `Null)]) encoding pdf = | ||||
|     if notjson | ||||
|       then Printf.printf "Subformats: %s\n" (combine_with_commas (determine_subformats pdf)) | ||||
|       else json =| ("Subformats", `List (map (fun x -> `String x) (determine_subformats pdf))); | ||||
|     if notjson | ||||
|       then Printf.printf "Language: %s\n" (match language pdf with None -> "" | Some x -> "\"" ^ x ^ "\"") | ||||
|       else json =| ("Language", match language pdf with None -> `Null | Some x -> `String x); | ||||
|     match get_metadata pdf with | ||||
|       None -> () | ||||
|     | Some metadata -> | ||||
|   | ||||
| @@ -87,6 +87,8 @@ val get_viewer_pref_item : string -> Pdf.t -> string | ||||
|  | ||||
| val determine_subformats : Pdf.t -> string list | ||||
|  | ||||
| val language : Pdf.t -> string option | ||||
|  | ||||
| val adobe : string | ||||
| val xmp : string | ||||
| val dc : string | ||||
|   | ||||
| @@ -164,7 +164,8 @@ let matterhorn_10_001 pdf = | ||||
|   unimpl () | ||||
|  | ||||
| (* Natural language for text in page content cannot be determined. *) | ||||
| let matterhorn_11_001 pdf = todo () | ||||
| let matterhorn_11_001 pdf = | ||||
|   unimpl () | ||||
|  | ||||
| (* Natural language for text in Alt, ActualText and E attributes cannot be | ||||
|    determined. *) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user