From 8c2440fb7a183ce3a6fe858ba5ff5485f648704b Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 16 Sep 2024 16:37:12 +0100 Subject: [PATCH] Added manual artifact control + turn off auto --- cpdfcommand.ml | 3 +++ cpdfdraw.ml | 31 +++++++++---------------------- cpdfdraw.mli | 4 ++++ cpdfdrawcontrol.ml | 4 ++++ cpdfdrawcontrol.mli | 3 +++ 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 801f323..34db148 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -2810,6 +2810,9 @@ let specs = ("-end-stag", Arg.Unit Cpdfdrawcontrol.endstag, " End structure branch"); ("-auto-tags", Arg.Unit (fun _ -> Cpdfdrawcontrol.autotags true), " Auto-tag paragraphs and figures"); ("-no-auto-tags", Arg.Unit (fun _ -> Cpdfdrawcontrol.autotags false), " Don't auto-tag paragraphs and figures"); + ("-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.artifact ()), " Begin an artifact"); + ("-end-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.endartifact ()), "End an artifact"); + ("-no-auto-artifacts", Arg.Unit (fun _ -> Cpdfdrawcontrol.autoartifacts false), " Don't mark untagged content as artifacts"); ("-rect", Arg.String Cpdfdrawcontrol.addrect, " Draw rectangle"); ("-to", Arg.String Cpdfdrawcontrol.addto, " Move to"); ("-line", Arg.String Cpdfdrawcontrol.addline, " Add line to"); diff --git a/cpdfdraw.ml b/cpdfdraw.ml index d018b4b..87380c8 100644 --- a/cpdfdraw.ml +++ b/cpdfdraw.ml @@ -1,6 +1,8 @@ open Pdfutil open Cpdferror +let do_add_artifacts = ref false + type colspec = NoCol | RGB of float * float * float @@ -56,6 +58,8 @@ type drawops = | Rise of float | STag of string | EndSTag + | BeginArtifact + | EndArtifact (*let rec string_of_drawop = function | Qq o -> "Qq (" ^ string_of_drawops o ^ ")" @@ -418,6 +422,10 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num | EndSTag -> structdata =| StDataEndTree; [] + | BeginArtifact -> + [Pdfops.Op_BMC "/Artifact"] + | EndArtifact -> + [Pdfops.Op_EMC] and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops = flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops) @@ -562,28 +570,6 @@ type st = | StItem of {kind : string; pageobjnum : int option; alt : string option; children : st list} (* Build a tree from the MCIDs and structure tree instructions gathered *) -(*let make_structure_tree pdf items = - (* Make map of page numbers to pageobjnums, and create a reference to keep track. *) - let pagenum = ref 0 in - let items_out = ref [] in - let pageobjnums = - let objnums = Pdf.page_reference_numbers pdf in - combine (indx objnums) objnums - in - (* Process the items, making the st list tree data structure *) - let process = function - | StDataMCID (n, mcid, alt) -> - items_out =| StItem {kind = n; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]} - | StDataPage n -> - pagenum := n - | StDataBeginTree s -> - () - | StDataEndTree -> - () - in - iter process items; - !items_out*) - let rec make_structure_tree pageobjnums pdf pagenum = function | [] -> [] | StDataMCID (n, mcid, alt)::t -> @@ -659,6 +645,7 @@ let write_structure_tree pdf st = let draw ~struct_tree ~fast ~underneath ~filename ~bates ~batespad range pdf drawops = (*Printf.printf "%s\n" (string_of_drawops drawops);*) + do_add_artifacts := struct_tree; resstack := [empty_res ()]; Hashtbl.clear !fontpacks; (res ()).time <- Cpdfstrftime.current_time (); diff --git a/cpdfdraw.mli b/cpdfdraw.mli index 0208a62..759a72a 100644 --- a/cpdfdraw.mli +++ b/cpdfdraw.mli @@ -55,6 +55,10 @@ type drawops = | Rise of float | STag of string | EndSTag + | BeginArtifact + | EndArtifact + +val do_add_artifacts : bool ref (** Calling [draw fast underneath filename bates batespad range pdf drawops] draws on top of all the pages in the range. *) diff --git a/cpdfdrawcontrol.ml b/cpdfdrawcontrol.ml index b3ea128..806e2e5 100644 --- a/cpdfdrawcontrol.ml +++ b/cpdfdrawcontrol.ml @@ -102,6 +102,10 @@ let endtag () = () let endstag () = addop Cpdfdraw.EndSTag let autotags b = () +let autoartifacts b = + Cpdfdraw.do_add_artifacts := b +let artifact () = () +let endartifact () = () let addrect s = let x, y, w, h = Cpdfcoord.parse_rectangle (Pdf.empty ()) s in diff --git a/cpdfdrawcontrol.mli b/cpdfdrawcontrol.mli index 789fba7..dac3f48 100644 --- a/cpdfdrawcontrol.mli +++ b/cpdfdrawcontrol.mli @@ -19,6 +19,9 @@ val addstag : string -> unit val endtag : unit -> unit val endstag : unit -> unit val autotags : bool -> unit +val artifact : unit -> unit +val endartifact : unit -> unit +val autoartifacts : bool -> unit val addrect : string -> unit val addto : string -> unit val addline : string -> unit