mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Adds -print-struct-tree
This commit is contained in:
		| @@ -221,6 +221,7 @@ type op = | ||||
|   | Verify of string | ||||
|   | MarkAs of string | ||||
|   | RemoveMark of string | ||||
|   | PrintStructTree | ||||
|   | ExtractStructTree | ||||
|   | ReplaceStructTree of string | ||||
|   | SetLanguage of string | ||||
| @@ -369,6 +370,7 @@ let string_of_op = function | ||||
|   | Verify _ -> "Verify" | ||||
|   | MarkAs _ -> "MarkAs" | ||||
|   | RemoveMark _ -> "RemoveMark" | ||||
|   | PrintStructTree -> "PrintStructTree" | ||||
|   | ExtractStructTree -> "ExtractStructTree" | ||||
|   | ReplaceStructTree _ -> "ReplaceStructTree" | ||||
|   | SetLanguage _ -> "SetLanguage" | ||||
| @@ -896,6 +898,7 @@ let banned banlist = function | ||||
|   | OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _ | ||||
|   | TextWidth _ | SetAnnotations _ | CopyAnnotations _ | ExtractStream _ | PrintObj _ | ||||
|   | Verify _ | MarkAs _ | RemoveMark _ | ExtractStructTree | ReplaceStructTree _ | SetLanguage _ | ||||
|   | PrintStructTree | ||||
|      -> false (* Always allowed *) | ||||
|   (* Combine pages is not allowed because we would not know where to get the | ||||
|   -recrypt from -- the first or second file? *) | ||||
| @@ -2825,6 +2828,7 @@ and specs = | ||||
|    ("-verify-single", Arg.String (fun s -> args.verify_single <- Some s), "Verify a single test"); | ||||
|    ("-mark-as", Arg.String (fun s -> setop (MarkAs s) ()), "Mark as conforming to a standard"); | ||||
|    ("-remove-mark", Arg.String (fun s -> setop (RemoveMark s) ()), "Remove conformance mark"); | ||||
|    ("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), "Print structure tree"); | ||||
|    ("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), "Extract structure tree in JSON format"); | ||||
|    ("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), "Replace structure tree from JSON"); | ||||
|    (* These items are undocumented *) | ||||
| @@ -4516,6 +4520,9 @@ let go () = | ||||
|             write_pdf false pdf | ||||
|       | _ -> error "Unknown standard" | ||||
|       end | ||||
|   | Some PrintStructTree -> | ||||
|       let pdf = get_single_pdf args.op true in | ||||
|         Cpdfua.print_struct_tree pdf | ||||
|   | Some ExtractStructTree -> | ||||
|       let pdf = get_single_pdf args.op true in | ||||
|       let json = Cpdfua.extract_struct_tree pdf in | ||||
|   | ||||
							
								
								
									
										29
									
								
								cpdfua.ml
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								cpdfua.ml
									
									
									
									
									
								
							| @@ -54,7 +54,17 @@ let read_a pdf stnode = | ||||
|     let at = | ||||
|       match Pdf.lookup_direct pdf "/ActualText" stnode with | Some _ -> ["/ActualText"] | None -> [] | ||||
|     in | ||||
|       from_a @ id @ at @ alt | ||||
|     let pageref = | ||||
|       match Pdf.direct pdf stnode with | ||||
|       | Pdf.Dictionary d -> | ||||
|           begin match lookup "/Pg" d with | ||||
|           | Some (Pdf.Indirect i) -> | ||||
|               ["_" ^ string_of_int i] | ||||
|           | _ -> [] | ||||
|           end | ||||
|       | _ -> [] | ||||
|     in | ||||
|       from_a @ id @ at @ alt @ pageref | ||||
|  | ||||
| let rec read_st_inner pdf stnode = | ||||
|   let s = | ||||
| @@ -1508,3 +1518,20 @@ let replace_struct_tree pdf json = | ||||
|       | _ -> error "Top level JSON wrong. Must be list with 0 first." | ||||
|   with | ||||
|     e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e)) | ||||
|  | ||||
| let print_struct_tree pdf = | ||||
|   let page_lookup = | ||||
|     hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf))) | ||||
|   in | ||||
|   let get_page attrs = | ||||
|     match option_map (fun x -> match explode x with '_'::more -> Some (implode more) | _ -> None) attrs with | ||||
|     | [i] -> string_of_int (try Hashtbl.find page_lookup (int_of_string i) with _ -> 0) | ||||
|     | _ -> "0" | ||||
|   in | ||||
|     let st = read_st2 pdf in | ||||
|       match st with E2 ("/StructTreeRoot", _, []) -> () | _ -> | ||||
|         flprint | ||||
|           (Cpdfprinttree.to_string | ||||
|             ~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x) | ||||
|             ~get_children:(fun (E2 (_, _, cs)) -> cs) | ||||
|             st) | ||||
|   | ||||
| @@ -9,6 +9,8 @@ val mark2 : int -> Pdf.t -> unit | ||||
|  | ||||
| val remove_mark : Pdf.t -> unit | ||||
|  | ||||
| val print_struct_tree : Pdf.t -> unit | ||||
|  | ||||
| val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t | ||||
|  | ||||
| val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit | ||||
|   | ||||
		Reference in New Issue
	
	Block a user