Adds -print-struct-tree
This commit is contained in:
parent
5cf1896703
commit
7824bd40f9
|
@ -221,6 +221,7 @@ type op =
|
||||||
| Verify of string
|
| Verify of string
|
||||||
| MarkAs of string
|
| MarkAs of string
|
||||||
| RemoveMark of string
|
| RemoveMark of string
|
||||||
|
| PrintStructTree
|
||||||
| ExtractStructTree
|
| ExtractStructTree
|
||||||
| ReplaceStructTree of string
|
| ReplaceStructTree of string
|
||||||
| SetLanguage of string
|
| SetLanguage of string
|
||||||
|
@ -369,6 +370,7 @@ let string_of_op = function
|
||||||
| Verify _ -> "Verify"
|
| Verify _ -> "Verify"
|
||||||
| MarkAs _ -> "MarkAs"
|
| MarkAs _ -> "MarkAs"
|
||||||
| RemoveMark _ -> "RemoveMark"
|
| RemoveMark _ -> "RemoveMark"
|
||||||
|
| PrintStructTree -> "PrintStructTree"
|
||||||
| ExtractStructTree -> "ExtractStructTree"
|
| ExtractStructTree -> "ExtractStructTree"
|
||||||
| ReplaceStructTree _ -> "ReplaceStructTree"
|
| ReplaceStructTree _ -> "ReplaceStructTree"
|
||||||
| SetLanguage _ -> "SetLanguage"
|
| SetLanguage _ -> "SetLanguage"
|
||||||
|
@ -895,7 +897,8 @@ let banned banlist = function
|
||||||
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
|
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
|
||||||
| OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _
|
| OCGRename | OCGList | OCGOrderAll | PrintFontEncoding _ | TableOfContents | Typeset _ | Composition _
|
||||||
| TextWidth _ | SetAnnotations _ | CopyAnnotations _ | ExtractStream _ | PrintObj _
|
| TextWidth _ | SetAnnotations _ | CopyAnnotations _ | ExtractStream _ | PrintObj _
|
||||||
| Verify _ | MarkAs _ | RemoveMark _ | ExtractStructTree | ReplaceStructTree _ | SetLanguage _
|
| Verify _ | MarkAs _ | RemoveMark _ | ExtractStructTree | ReplaceStructTree _ | SetLanguage _
|
||||||
|
| PrintStructTree
|
||||||
-> false (* Always allowed *)
|
-> false (* Always allowed *)
|
||||||
(* Combine pages is not allowed because we would not know where to get the
|
(* Combine pages is not allowed because we would not know where to get the
|
||||||
-recrypt from -- the first or second file? *)
|
-recrypt from -- the first or second file? *)
|
||||||
|
@ -2825,6 +2828,7 @@ and specs =
|
||||||
("-verify-single", Arg.String (fun s -> args.verify_single <- Some s), "Verify a single test");
|
("-verify-single", Arg.String (fun s -> args.verify_single <- Some s), "Verify a single test");
|
||||||
("-mark-as", Arg.String (fun s -> setop (MarkAs s) ()), "Mark as conforming to a standard");
|
("-mark-as", Arg.String (fun s -> setop (MarkAs s) ()), "Mark as conforming to a standard");
|
||||||
("-remove-mark", Arg.String (fun s -> setop (RemoveMark s) ()), "Remove conformance mark");
|
("-remove-mark", Arg.String (fun s -> setop (RemoveMark s) ()), "Remove conformance mark");
|
||||||
|
("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), "Print structure tree");
|
||||||
("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), "Extract structure tree in JSON format");
|
("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), "Extract structure tree in JSON format");
|
||||||
("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), "Replace structure tree from JSON");
|
("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), "Replace structure tree from JSON");
|
||||||
(* These items are undocumented *)
|
(* These items are undocumented *)
|
||||||
|
@ -4516,6 +4520,9 @@ let go () =
|
||||||
write_pdf false pdf
|
write_pdf false pdf
|
||||||
| _ -> error "Unknown standard"
|
| _ -> error "Unknown standard"
|
||||||
end
|
end
|
||||||
|
| Some PrintStructTree ->
|
||||||
|
let pdf = get_single_pdf args.op true in
|
||||||
|
Cpdfua.print_struct_tree pdf
|
||||||
| Some ExtractStructTree ->
|
| Some ExtractStructTree ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let json = Cpdfua.extract_struct_tree pdf in
|
let json = Cpdfua.extract_struct_tree pdf in
|
||||||
|
|
29
cpdfua.ml
29
cpdfua.ml
|
@ -54,7 +54,17 @@ let read_a pdf stnode =
|
||||||
let at =
|
let at =
|
||||||
match Pdf.lookup_direct pdf "/ActualText" stnode with | Some _ -> ["/ActualText"] | None -> []
|
match Pdf.lookup_direct pdf "/ActualText" stnode with | Some _ -> ["/ActualText"] | None -> []
|
||||||
in
|
in
|
||||||
from_a @ id @ at @ alt
|
let pageref =
|
||||||
|
match Pdf.direct pdf stnode with
|
||||||
|
| Pdf.Dictionary d ->
|
||||||
|
begin match lookup "/Pg" d with
|
||||||
|
| Some (Pdf.Indirect i) ->
|
||||||
|
["_" ^ string_of_int i]
|
||||||
|
| _ -> []
|
||||||
|
end
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
from_a @ id @ at @ alt @ pageref
|
||||||
|
|
||||||
let rec read_st_inner pdf stnode =
|
let rec read_st_inner pdf stnode =
|
||||||
let s =
|
let s =
|
||||||
|
@ -1508,3 +1518,20 @@ let replace_struct_tree pdf json =
|
||||||
| _ -> error "Top level JSON wrong. Must be list with 0 first."
|
| _ -> error "Top level JSON wrong. Must be list with 0 first."
|
||||||
with
|
with
|
||||||
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
|
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
|
||||||
|
|
||||||
|
let print_struct_tree pdf =
|
||||||
|
let page_lookup =
|
||||||
|
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
||||||
|
in
|
||||||
|
let get_page attrs =
|
||||||
|
match option_map (fun x -> match explode x with '_'::more -> Some (implode more) | _ -> None) attrs with
|
||||||
|
| [i] -> string_of_int (try Hashtbl.find page_lookup (int_of_string i) with _ -> 0)
|
||||||
|
| _ -> "0"
|
||||||
|
in
|
||||||
|
let st = read_st2 pdf in
|
||||||
|
match st with E2 ("/StructTreeRoot", _, []) -> () | _ ->
|
||||||
|
flprint
|
||||||
|
(Cpdfprinttree.to_string
|
||||||
|
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
||||||
|
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
||||||
|
st)
|
||||||
|
|
|
@ -9,6 +9,8 @@ val mark2 : int -> Pdf.t -> unit
|
||||||
|
|
||||||
val remove_mark : Pdf.t -> unit
|
val remove_mark : Pdf.t -> unit
|
||||||
|
|
||||||
|
val print_struct_tree : Pdf.t -> unit
|
||||||
|
|
||||||
val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t
|
val extract_struct_tree : Pdf.t -> Cpdfyojson.Safe.t
|
||||||
|
|
||||||
val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit
|
val replace_struct_tree : Pdf.t -> Cpdfyojson.Safe.t -> unit
|
||||||
|
|
Loading…
Reference in New Issue