Fix tags, artifacts

This commit is contained in:
John Whitington 2024-09-25 14:35:21 +01:00
parent b676ab4923
commit 01fcd13251
2 changed files with 12 additions and 6 deletions

View File

@ -429,7 +429,7 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
[] []
| TextSection ops -> | TextSection ops ->
let m = mcid () in let m = mcid () in
if not dryrun then structdata := StDataMCID ("P", m, None)::!structdata; if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata;
(if struct_tree && !do_auto_tag then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) (if struct_tree && !do_auto_tag then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else [])
@ [Pdfops.Op_BT] @ [Pdfops.Op_BT]
@ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops
@ -459,10 +459,10 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
| Newline -> [Pdfops.Op_T'] | Newline -> [Pdfops.Op_T']
| Tag s -> | Tag s ->
let m = mcid () in let m = mcid () in
if not dryrun then structdata := StDataMCID (s, m, None)::!structdata; if not dryrun then structdata := StDataMCID ("/" ^ s, m, None)::!structdata;
[Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary ["/MCID", Pdf.Integer m])] [Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary ["/MCID", Pdf.Integer m])]
| EndTag -> [Pdfops.Op_EMC] | EndTag -> [Pdfops.Op_EMC]
| STag s -> if not dryrun then structdata =| StDataBeginTree s; [] | STag s -> if not dryrun then structdata =| StDataBeginTree ("/" ^ s); []
| EndSTag -> if not dryrun then structdata =| StDataEndTree; [] | EndSTag -> if not dryrun then structdata =| StDataEndTree; []
| BeginArtifact -> [Pdfops.Op_BMC "/BeginArtifact"] | BeginArtifact -> [Pdfops.Op_BMC "/BeginArtifact"]
| EndArtifact -> [Pdfops.Op_BMC "/EndArtifact"] | EndArtifact -> [Pdfops.Op_BMC "/EndArtifact"]
@ -569,6 +569,12 @@ let add_artifacts ops =
in in
loop [] ops loop [] ops
(* When no automatic artifacting, we still need to fix our backchannel manual artifacts. *)
let fixup_manual_artifacts =
map (function Pdfops.Op_BMC "/BeginArtifact" -> Pdfops.Op_BMC "/Artifact"
| Pdfops.Op_BMC "/EndArtifact" -> Pdfops.Op_EMC
| x -> x)
let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
(res ()).num <- max (res ()).num (minimum_resource_number pdf range); (res ()).num <- max (res ()).num (minimum_resource_number pdf range);
let endpage = Pdfpage.endpage pdf in let endpage = Pdfpage.endpage pdf in
@ -594,7 +600,7 @@ let draw_single ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range
map3 map3
(fun n p ops -> (fun n p ops ->
if not (mem n range) then p else if not (mem n range) then p else
let ops = if struct_tree && !do_add_artifacts then add_artifacts ops else ops in let ops = if struct_tree && !do_add_artifacts then add_artifacts ops else fixup_manual_artifacts ops in
let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in let page = {p with Pdfpage.resources = update_resources pdf p.Pdfpage.resources} in
(if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page) (if underneath then Pdfpage.prepend_operators else Pdfpage.postpend_operators) pdf ops ~fast page)
(ilist 1 endpage) (ilist 1 endpage)
@ -692,7 +698,7 @@ let write_structure_tree pdf st =
(alt (alt
@ page @ page
@ namespace @ namespace
@ [("/S", Pdf.Name ("/" ^ kind)); @ [("/S", Pdf.Name kind);
("/P", Pdf.Indirect struct_tree_parent); ("/P", Pdf.Indirect struct_tree_parent);
("/K", Pdf.Array (map (mktree this_objnum) children))]) ("/K", Pdf.Array (map (mktree this_objnum) children))])
in in

View File

@ -5205,7 +5205,7 @@ $cpdf -print-struct-tree out.pdf
StructTreeRoot StructTreeRoot
└── P (1)\end{verbatim} └── P (1)\end{verbatim}
\noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs /P, and images as /Figure. \noindent To prevent such automatic tagging, relying only on manual tags, use \texttt{-no-auto-tags}. The effect may be reversed at any point with \texttt{-auto-tags}. Unless told otherwise, Cpdf auto-tags text as paragraphs P, and images as Figure.
There are two types of tag we can add manually. One kind is used to tag individual pieces of content. We do this with a \texttt{-tag}/\texttt{-end-tag} pair. Note that nesting is not permitted here. For example, let us tag a heading: There are two types of tag we can add manually. One kind is used to tag individual pieces of content. We do this with a \texttt{-tag}/\texttt{-end-tag} pair. Note that nesting is not permitted here. For example, let us tag a heading: