Flatter E2, hence better -print-struct-tree

This commit is contained in:
John Whitington 2024-09-23 17:45:22 +01:00
parent 0cff79e049
commit 4e9fdbe21c
3 changed files with 27 additions and 28 deletions

Binary file not shown.

View File

@ -5202,9 +5202,7 @@ To enable the generation of structure information, we may add \texttt{-draw-stru
\begin{verbatim}
$cpdf -print-struct-tree out.pdf
/StructTreeRoot
└──
└── /P (1)
└──\end{verbatim}
└── /P (1)\end{verbatim}
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure.
@ -5229,13 +5227,9 @@ There are two types of tag we can add manually. One kind is used to tag individu
\begin{verbatim}
/StructTreeRoot
└──
├── /H1 (1)
│ └──
├── /P (1)
│ └──
└── /P (1)
└──
\end{verbatim}
@ -5344,12 +5338,10 @@ We can print an abbreviated form of the structure tree to standard output:
\begin{framed}
\begin{verbatim}
/StructTreeRoot
└──
└── /Document
├── /Sect
│ ├── /P (1)
│ │ ├── /Span (1)
│ │ └──
│ └── /Figure (1)
├── /Sect
│ ├── /H1 (2)
@ -5359,7 +5351,7 @@ We can print an abbreviated form of the structure tree to standard output:
│ │ └── /Link (2)
. .
. .
.
. .
\end{verbatim}
\end{framed}
\end{minipage}

View File

@ -135,7 +135,9 @@ let read_st_basic pdf =
| Pdf.Dictionary d ->
begin match lookup "/StructTreeRoot" d with
| None -> E2 ("/StructTreeRoot", [], [])
| Some st -> E2 ("/StructTreeRoot", [], [read_st_inner pdf st])
| Some st ->
match read_st_inner pdf st with
| E2 (_, a, b) -> E2 ("/StructTreeRoot", a, b)
end
| _ -> error "read_st no root"
@ -1577,6 +1579,11 @@ let replace_struct_tree pdf json =
with
e -> error (Printf.sprintf "replace_struct_tree: %s" (Printexc.to_string e))
let rec remove_empty = function
E2 (n, attrs, cs) ->
let cs' = map remove_empty cs in
E2 (n, attrs, lose (function E2 ("", _, []) -> true | _ -> false) cs')
let print_struct_tree pdf =
let page_lookup =
hashtable_of_dictionary (combine (Pdf.page_reference_numbers pdf) (ilist 1 (Pdfpage.endpage pdf)))
@ -1592,7 +1599,7 @@ let print_struct_tree pdf =
(Cpdfprinttree.to_string
~get_name:(fun (E2 (x, a, _)) -> if int_of_string (get_page a) > 0 then x ^ " (" ^ get_page a ^ ")" else x)
~get_children:(fun (E2 (_, _, cs)) -> cs)
st)
(remove_empty st))
let cpdfua_args title =
[ "-create-pdf";