From fa831170c3b60b15048c6219ddc6b7bbd7362512 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 23 Sep 2024 15:37:27 +0100 Subject: [PATCH] Redacting with process_struct_trees --- cpdfcommand.ml | 2 +- cpdfpage.ml | 29 ++++++++++++++++------------- cpdfpage.mli | 2 +- cpdfpagespec.mli | 2 ++ 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index a726490..07be95e 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4650,7 +4650,7 @@ let go () = | Some Redact -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - write_pdf false (Cpdfpage.redact pdf range) + write_pdf false (Cpdfpage.redact ~process_struct_tree:args.process_struct_trees pdf range) (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfpage.ml b/cpdfpage.ml index 4aae5e6..c53f86a 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -996,17 +996,20 @@ let copy_box f t mediabox_if_missing pdf range = pdf range -(* TODO: Trim structure tree *) -let redact pdf range = - process_pages - (Pdfpage.ppstub - (fun pnum page -> - if mem pnum range then - {page with - Pdfpage.content = []; - Pdfpage.resources = Pdf.Dictionary []; - Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"} - else - page)) +let redact ~process_struct_tree pdf range = + let pdf = + process_pages + (Pdfpage.ppstub + (fun pnum page -> + if mem pnum range then + {page with + Pdfpage.content = []; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"} + else + page)) + pdf + range + in + if process_struct_tree then Pdfst.trim_structure_tree pdf (Cpdfpagespec.invert_range (Pdfpage.endpage pdf) range); pdf - range diff --git a/cpdfpage.mli b/cpdfpage.mli index 243ec16..02c4cf1 100644 --- a/cpdfpage.mli +++ b/cpdfpage.mli @@ -131,4 +131,4 @@ val alluprightonly : int list -> Pdf.t -> bool (** When a page is transformed, its patterns must be too. *) val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t -val redact : Pdf.t -> int list -> Pdf.t +val redact : process_struct_tree:bool -> Pdf.t -> int list -> Pdf.t diff --git a/cpdfpagespec.mli b/cpdfpagespec.mli index c3d92fb..ab25f93 100644 --- a/cpdfpagespec.mli +++ b/cpdfpagespec.mli @@ -32,3 +32,5 @@ val validate_pagespec : string -> bool (** Return a string for the given range. Knows how to identify all, odd, even, x-y ranges etc. *) val string_of_pagespec : Pdf.t -> int list -> string + +val invert_range : int -> int list -> int list