diff --git a/cpdfmanual.pdf b/cpdfmanual.pdf index b887b6a..4cecd1f 100644 Binary files a/cpdfmanual.pdf and b/cpdfmanual.pdf differ diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 8226c1e..e7b0851 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -5202,9 +5202,7 @@ To enable the generation of structure information, we may add \texttt{-draw-stru \begin{verbatim} $cpdf -print-struct-tree out.pdf /StructTreeRoot -└── - └── /P (1) - └──\end{verbatim} +└── /P (1)\end{verbatim} \noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure. @@ -5229,13 +5227,9 @@ There are two types of tag we can add manually. One kind is used to tag individu \begin{verbatim} /StructTreeRoot -└── - ├── /H1 (1) - │ └── - ├── /P (1) - │ └── - └── /P (1) - └── +├── /H1 (1) +├── /P (1) +└── /P (1) \end{verbatim} @@ -5344,22 +5338,20 @@ We can print an abbreviated form of the structure tree to standard output: \begin{framed} \begin{verbatim} /StructTreeRoot -└── - └── /Document - ├── /Sect - │ ├── /P (1) - │ │ ├── /Span (1) - │ │ └── - │ └── /Figure (1) - ├── /Sect - │ ├── /H1 (2) - │ └── /TOC - │ ├── /TOCI - │ │ └── /P - │ │ └── /Link (2) - . . - . . - . +└── /Document + ├── /Sect + │ ├── /P (1) + │ │ ├── /Span (1) + │ └── /Figure (1) + ├── /Sect + │ ├── /H1 (2) + │ └── /TOC + │ ├── /TOCI + │ │ └── /P + │ │ └── /Link (2) + . . + . . + . . \end{verbatim} \end{framed} \end{minipage} diff --git a/cpdfua.ml b/cpdfua.ml index 5cdc3b5..e717ab8 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -135,7 +135,9 @@ let read_st_basic pdf = | Pdf.Dictionary d -> begin match lookup "/StructTreeRoot" d with | None -> E2 ("/StructTreeRoot", [], []) - | Some st -> E2 ("/StructTreeRoot", [], [read_st_inner pdf st]) + | Some st -> + match read_st_inner pdf st with + | E2 (_, a, b) -> E2 ("/StructTreeRoot", a, b) end | _ -> error "read_st no root" @@ -1577,6 +1579,11 @@ let replace_struct_tree pdf json = with e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e)) +let rec remove_empty = function + E2 (n, attrs, cs) -> + let cs' = map remove_empty cs in + E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs') + let print_struct_tree pdf = let page_lookup = hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf))) @@ -1592,7 +1599,7 @@ let print_struct_tree pdf = (Cpdfprinttree.to_string ~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x) ~get_children:(fun (E2 (_, _, cs)) -> cs) - st) + (remove_empty st)) let cpdfua_args title = [ "-create-pdf";