Flatter E2, hence better -print-struct-tree
This commit is contained in:
parent
0cff79e049
commit
4e9fdbe21c
BIN
cpdfmanual.pdf
BIN
cpdfmanual.pdf
Binary file not shown.
|
@ -5202,9 +5202,7 @@ To enable the generation of structure information, we may add \texttt{-draw-stru
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
$cpdf -print-struct-tree out.pdf
|
$cpdf -print-struct-tree out.pdf
|
||||||
/StructTreeRoot
|
/StructTreeRoot
|
||||||
└──
|
└── /P (1)\end{verbatim}
|
||||||
└── /P (1)
|
|
||||||
└──\end{verbatim}
|
|
||||||
|
|
||||||
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
|
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
|
||||||
|
|
||||||
|
@ -5229,13 +5227,9 @@ There are two types of tag we can add manually. One kind is used to tag individu
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/StructTreeRoot
|
/StructTreeRoot
|
||||||
└──
|
├── /H1 (1)
|
||||||
├── /H1 (1)
|
├── /P (1)
|
||||||
│ └──
|
└── /P (1)
|
||||||
├── /P (1)
|
|
||||||
│ └──
|
|
||||||
└── /P (1)
|
|
||||||
└──
|
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5344,22 +5338,20 @@ We can print an abbreviated form of the structure tree to standard output:
|
||||||
\begin{framed}
|
\begin{framed}
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/StructTreeRoot
|
/StructTreeRoot
|
||||||
└──
|
└── /Document
|
||||||
└── /Document
|
├── /Sect
|
||||||
├── /Sect
|
│ ├── /P (1)
|
||||||
│ ├── /P (1)
|
│ │ ├── /Span (1)
|
||||||
│ │ ├── /Span (1)
|
│ └── /Figure (1)
|
||||||
│ │ └──
|
├── /Sect
|
||||||
│ └── /Figure (1)
|
│ ├── /H1 (2)
|
||||||
├── /Sect
|
│ └── /TOC
|
||||||
│ ├── /H1 (2)
|
│ ├── /TOCI
|
||||||
│ └── /TOC
|
│ │ └── /P
|
||||||
│ ├── /TOCI
|
│ │ └── /Link (2)
|
||||||
│ │ └── /P
|
. .
|
||||||
│ │ └── /Link (2)
|
. .
|
||||||
. .
|
. .
|
||||||
. .
|
|
||||||
.
|
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
\end{framed}
|
\end{framed}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
|
|
11
cpdfua.ml
11
cpdfua.ml
|
@ -135,7 +135,9 @@ let read_st_basic pdf =
|
||||||
| Pdf.Dictionary d ->
|
| Pdf.Dictionary d ->
|
||||||
begin match lookup "/StructTreeRoot" d with
|
begin match lookup "/StructTreeRoot" d with
|
||||||
| None -> E2 ("/StructTreeRoot", [], [])
|
| None -> E2 ("/StructTreeRoot", [], [])
|
||||||
| Some st -> E2 ("/StructTreeRoot", [], [read_st_inner pdf st])
|
| Some st ->
|
||||||
|
match read_st_inner pdf st with
|
||||||
|
| E2 (_, a, b) -> E2 ("/StructTreeRoot", a, b)
|
||||||
end
|
end
|
||||||
| _ -> error "read_st no root"
|
| _ -> error "read_st no root"
|
||||||
|
|
||||||
|
@ -1577,6 +1579,11 @@ let replace_struct_tree pdf json =
|
||||||
with
|
with
|
||||||
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
|
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
|
||||||
|
|
||||||
|
let rec remove_empty = function
|
||||||
|
E2 (n, attrs, cs) ->
|
||||||
|
let cs' = map remove_empty cs in
|
||||||
|
E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs')
|
||||||
|
|
||||||
let print_struct_tree pdf =
|
let print_struct_tree pdf =
|
||||||
let page_lookup =
|
let page_lookup =
|
||||||
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
||||||
|
@ -1592,7 +1599,7 @@ let print_struct_tree pdf =
|
||||||
(Cpdfprinttree.to_string
|
(Cpdfprinttree.to_string
|
||||||
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
||||||
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
||||||
st)
|
(remove_empty st))
|
||||||
|
|
||||||
let cpdfua_args title =
|
let cpdfua_args title =
|
||||||
[ "-create-pdf";
|
[ "-create-pdf";
|
||||||
|
|
Loading…
Reference in New Issue