remove slashes from print_struct_tree
This commit is contained in:
parent
d8c91180bc
commit
6495daba8c
BIN
cpdfmanual.pdf
BIN
cpdfmanual.pdf
Binary file not shown.
|
@ -5201,8 +5201,8 @@ To enable the generation of structure information, we may add \texttt{-draw-stru
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
$cpdf -print-struct-tree out.pdf
|
$cpdf -print-struct-tree out.pdf
|
||||||
/StructTreeRoot
|
StructTreeRoot
|
||||||
└── /P (1)\end{verbatim}
|
└── P (1)\end{verbatim}
|
||||||
|
|
||||||
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
|
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
|
||||||
|
|
||||||
|
@ -5226,10 +5226,10 @@ There are two types of tag we can add manually. One kind is used to tag individu
|
||||||
\noindent And here is the structure tree:
|
\noindent And here is the structure tree:
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/StructTreeRoot
|
StructTreeRoot
|
||||||
├── /H1 (1)
|
├── H1 (1)
|
||||||
├── /P (1)
|
├── P (1)
|
||||||
└── /P (1)
|
└── P (1)
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
\noindent Content tagging is flat - every part of the content of a page is part of only one \texttt{-tag}. The logical structure of a document, however, is a tree structure -- sections contain paragraphs, and so on. To build the logical structure tree, we add structure tags using \texttt{-stag} / \texttt{-end-stag} pairs which, of course, may be nested. For example, let's put our H1, and P sections in a Section structure tag:
|
\noindent Content tagging is flat - every part of the content of a page is part of only one \texttt{-tag}. The logical structure of a document, however, is a tree structure -- sections contain paragraphs, and so on. To build the logical structure tree, we add structure tags using \texttt{-stag} / \texttt{-end-stag} pairs which, of course, may be nested. For example, let's put our H1, and P sections in a Section structure tag:
|
||||||
|
@ -5246,11 +5246,11 @@ There are two types of tag we can add manually. One kind is used to tag individu
|
||||||
\noindent Here is the structure tree:
|
\noindent Here is the structure tree:
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/StructTreeRoot
|
StructTreeRoot
|
||||||
└──/Section (1)
|
└──Section (1)
|
||||||
├── /H1 (1)
|
├── H1 (1)
|
||||||
├── /P (1)
|
├── P (1)
|
||||||
└── /P (1)
|
└── P (1)
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
\noindent Some PDF standards require that everything not marked as content (e.g paragraph, figure) etc. is marked as a an artifact. For example, a background image which is the same on every page, or a page border. This tells PDF processors that it is not logical content.
|
\noindent Some PDF standards require that everything not marked as content (e.g paragraph, figure) etc. is marked as a an artifact. For example, a background image which is the same on every page, or a page border. This tells PDF processors that it is not logical content.
|
||||||
|
@ -5356,18 +5356,18 @@ We can print an abbreviated form of the structure tree to standard output:
|
||||||
\begin{minipage}{\linewidth}
|
\begin{minipage}{\linewidth}
|
||||||
\begin{framed}
|
\begin{framed}
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
/StructTreeRoot
|
StructTreeRoot
|
||||||
└── /Document
|
└── Document
|
||||||
├── /Sect
|
├── Sect
|
||||||
│ ├── /P (1)
|
│ ├── P (1)
|
||||||
│ │ ├── /Span (1)
|
│ │ ├── Span (1)
|
||||||
│ └── /Figure (1)
|
│ └── Figure (1)
|
||||||
├── /Sect
|
├── Sect
|
||||||
│ ├── /H1 (2)
|
│ ├── H1 (2)
|
||||||
│ └── /TOC
|
│ └── TOC
|
||||||
│ ├── /TOCI
|
│ ├── TOCI
|
||||||
│ │ └── /P
|
│ │ └── P
|
||||||
│ │ └── /Link (2)
|
│ │ └── Link (2)
|
||||||
. .
|
. .
|
||||||
. .
|
. .
|
||||||
. .
|
. .
|
||||||
|
|
|
@ -1584,6 +1584,9 @@ let rec remove_empty = function
|
||||||
let cs' = map remove_empty cs in
|
let cs' = map remove_empty cs in
|
||||||
E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs')
|
E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs')
|
||||||
|
|
||||||
|
let rec remove_slashes = function
|
||||||
|
E2 (n, attrs, cs) -> E2 ((match n with "" -> "" | n -> implode (tl (explode n))), attrs, map remove_slashes cs)
|
||||||
|
|
||||||
let print_struct_tree pdf =
|
let print_struct_tree pdf =
|
||||||
let page_lookup =
|
let page_lookup =
|
||||||
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
|
||||||
|
@ -1599,7 +1602,7 @@ let print_struct_tree pdf =
|
||||||
(Cpdfprinttree.to_string
|
(Cpdfprinttree.to_string
|
||||||
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
|
||||||
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
~get_children:(fun (E2 (_, _, cs)) -> cs)
|
||||||
(remove_empty st))
|
(remove_empty (remove_slashes st)))
|
||||||
|
|
||||||
let cpdfua_args title =
|
let cpdfua_args title =
|
||||||
[ "-create-pdf";
|
[ "-create-pdf";
|
||||||
|
|
Loading…
Reference in New Issue