Flatter E2, hence better -print-struct-tree
This commit is contained in:
parent
0cff79e049
commit
4e9fdbe21c
BIN
cpdfmanual.pdf
BIN
cpdfmanual.pdf
Binary file not shown.
|
@ -5202,9 +5202,7 @@ To enable the generation of structure information, we may add \texttt{-draw-stru
|
|||
\begin{verbatim}
|
||||
$cpdf -print-struct-tree out.pdf
|
||||
/StructTreeRoot
|
||||
└──
|
||||
└── /P (1)
|
||||
└──\end{verbatim}
|
||||
└── /P (1)\end{verbatim}
|
||||
|
||||
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
|
||||
|
||||
|
@ -5229,13 +5227,9 @@ There are two types of tag we can add manually. One kind is used to tag individu
|
|||
|
||||
\begin{verbatim}
|
||||
/StructTreeRoot
|
||||
└──
|
||||
├── /H1 (1)
|
||||
│ └──
|
||||
├── /P (1)
|
||||
│ └──
|
||||
└── /P (1)
|
||||
└──
|
||||
├── /H1 (1)
|
||||
├── /P (1)
|
||||
└── /P (1)
|
||||
\end{verbatim}
|
||||
|
||||
|
||||
|
@ -5344,22 +5338,20 @@ We can print an abbreviated form of the structure tree to standard output:
|
|||
\begin{framed}
|
||||
\begin{verbatim}
|
||||
/StructTreeRoot
|
||||
└──
|
||||
└── /Document
|
||||
├── /Sect
|
||||
│ ├── /P (1)
|
||||
│ │ ├── /Span (1)
|
||||
│ │ └──
|
||||
│ └── /Figure (1)
|
||||
├── /Sect
|
||||
│ ├── /H1 (2)
|
||||
│ └── /TOC
|
||||
│ ├── /TOCI
|
||||
│ │ └── /P
|
||||
│ │ └── /Link (2)
|
||||
. .
|
||||
. .
|
||||
.
|
||||
└── /Document
|
||||
├── /Sect
|
||||
│ ├── /P (1)
|
||||
│ │ ├── /Span (1)
|
||||
│ └── /Figure (1)
|
||||
├── /Sect
|
||||
│ ├── /H1 (2)
|
||||
│ └── /TOC
|
||||
│ ├── /TOCI
|
||||
│ │ └── /P
|
||||
│ │ └── /Link (2)
|
||||
. .
|
||||
. .
|
||||
. .
|
||||
\end{verbatim}
|
||||
\end{framed}
|
||||
\end{minipage}
|
||||
|
|
11
cpdfua.ml
11
cpdfua.ml
|
@ -135,7 +135,9 @@ let read_st_basic pdf =
|
|||
| Pdf.Dictionary d ->
|
||||
begin match lookup "/StructTreeRoot" d with
|
||||
| None -> E2 ("/StructTreeRoot", [], [])
|
||||
| Some st -> E2 ("/StructTreeRoot", [], [read_st_inner pdf st])
|
||||
| Some st ->
|
||||
match read_st_inner pdf st with
|
||||
| E2 (_, a, b) -> E2 ("/StructTreeRoot", a, b)
|
||||
end
|
||||
| _ -> error "read_st no root"
|
||||
|
||||
|
@ -1577,6 +1579,11 @@ let replace_struct_tree pdf json =
|
|||
with
|
||||
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
|
||||
|
||||
let rec remove_empty = function
|
||||
E2 (n, attrs, cs) ->
|
||||
let cs' = map remove_empty cs in
|
||||
E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs')
|
||||
|
||||
let print_struct_tree pdf =
|
||||
let page_lookup =
|
||||
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
||||
|
@ -1592,7 +1599,7 @@ let print_struct_tree pdf =
|
|||
(Cpdfprinttree.to_string
|
||||
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
||||
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
||||
st)
|
||||
(remove_empty st))
|
||||
|
||||
let cpdfua_args title =
|
||||
[ "-create-pdf";
|
||||
|
|
Loading…
Reference in New Issue