Added manual artifact control + turn off auto

This commit is contained in:
John Whitington 2024-09-16 16:37:12 +01:00
parent 18b569aeff
commit 8c2440fb7a
5 changed files with 23 additions and 22 deletions

View File

@ -2810,6 +2810,9 @@ let specs =
("-end-stag", Arg.Unit Cpdfdrawcontrol.endstag, " End structure branch");
("-auto-tags", Arg.Unit (fun _ -> Cpdfdrawcontrol.autotags true), " Auto-tag paragraphs and figures");
("-no-auto-tags", Arg.Unit (fun _ -> Cpdfdrawcontrol.autotags false), " Don't auto-tag paragraphs and figures");
("-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.artifact ()), " Begin an artifact");
("-end-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.endartifact ()), "End an artifact");
("-no-auto-artifacts", Arg.Unit (fun _ -> Cpdfdrawcontrol.autoartifacts false), " Don't mark untagged content as artifacts");
("-rect", Arg.String Cpdfdrawcontrol.addrect, " Draw rectangle");
("-to", Arg.String Cpdfdrawcontrol.addto, " Move to");
("-line", Arg.String Cpdfdrawcontrol.addline, " Add line to");

View File

@ -1,6 +1,8 @@
open Pdfutil
open Cpdferror
let do_add_artifacts = ref false
type colspec =
NoCol
| RGB of float * float * float
@ -56,6 +58,8 @@ type drawops =
| Rise of float
| STag of string
| EndSTag
| BeginArtifact
| EndArtifact
(*let rec string_of_drawop = function
| Qq o -> "Qq (" ^ string_of_drawops o ^ ")"
@ -418,6 +422,10 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
| EndSTag ->
structdata =| StDataEndTree;
[]
| BeginArtifact ->
[Pdfops.Op_BMC "/Artifact"]
| EndArtifact ->
[Pdfops.Op_EMC]
and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops =
flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops)
@ -562,28 +570,6 @@ type st =
| StItem of {kind : string; pageobjnum : int option; alt : string option; children : st list}
(* Build a tree from the MCIDs and structure tree instructions gathered *)
(*let make_structure_tree pdf items =
(* Make map of page numbers to pageobjnums, and create a reference to keep track. *)
let pagenum = ref 0 in
let items_out = ref [] in
let pageobjnums =
let objnums = Pdf.page_reference_numbers pdf in
combine (indx objnums) objnums
in
(* Process the items, making the st list tree data structure *)
let process = function
| StDataMCID (n, mcid, alt) ->
items_out =| StItem {kind = n; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}
| StDataPage n ->
pagenum := n
| StDataBeginTree s ->
()
| StDataEndTree ->
()
in
iter process items;
!items_out*)
let rec make_structure_tree pageobjnums pdf pagenum = function
| [] -> []
| StDataMCID (n, mcid, alt)::t ->
@ -659,6 +645,7 @@ let write_structure_tree pdf st =
let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops =
(*Printf.printf "%s\n" (string_of_drawops drawops);*)
do_add_artifacts := struct_tree;
resstack := [empty_res ()];
Hashtbl.clear !fontpacks;
(res ()).time <- Cpdfstrftime.current_time ();

View File

@ -55,6 +55,10 @@ type drawops =
| Rise of float
| STag of string
| EndSTag
| BeginArtifact
| EndArtifact
val do_add_artifacts : bool ref
(** Calling [draw fast underneath filename bates batespad range pdf drawops] draws on
top of all the pages in the range. *)

View File

@ -102,6 +102,10 @@ let endtag () = ()
let endstag () =
addop Cpdfdraw.EndSTag
let autotags b = ()
let autoartifacts b =
Cpdfdraw.do_add_artifacts := b
let artifact () = ()
let endartifact () = ()
let addrect s =
let x, y, w, h = Cpdfcoord.parse_rectangle (Pdf.empty ()) s in

View File

@ -19,6 +19,9 @@ val addstag : string -> unit
val endtag : unit -> unit
val endstag : unit -> unit
val autotags : bool -> unit
val artifact : unit -> unit
val endartifact : unit -> unit
val autoartifacts : bool -> unit
val addrect : string -> unit
val addto : string -> unit
val addline : string -> unit