Redacting with process_struct_trees

This commit is contained in:
John Whitington 2024-09-23 15:37:27 +01:00
parent 383db052d4
commit fa831170c3
4 changed files with 20 additions and 15 deletions

View File

@ -4650,7 +4650,7 @@ let go () =
| Some Redact ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
write_pdf false (Cpdfpage.redact pdf range)
write_pdf false (Cpdfpage.redact ~process_struct_tree:args.process_struct_trees pdf range)
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *)

View File

@ -996,17 +996,20 @@ let copy_box f t mediabox_if_missing pdf range =
pdf
range
(* TODO: Trim structure tree *)
let redact pdf range =
process_pages
(Pdfpage.ppstub
(fun pnum page ->
if mem pnum range then
{page with
Pdfpage.content = [];
Pdfpage.resources = Pdf.Dictionary [];
Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"}
else
page))
let redact ~process_struct_tree pdf range =
let pdf =
process_pages
(Pdfpage.ppstub
(fun pnum page ->
if mem pnum range then
{page with
Pdfpage.content = [];
Pdfpage.resources = Pdf.Dictionary [];
Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"}
else
page))
pdf
range
in
if process_struct_tree then Pdfst.trim_structure_tree pdf (Cpdfpagespec.invert_range (Pdfpage.endpage pdf) range);
pdf
range

View File

@ -131,4 +131,4 @@ val alluprightonly : int list -> Pdf.t -> bool
(** When a page is transformed, its patterns must be too. *)
val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t
val redact : Pdf.t -> int list -> Pdf.t
val redact : process_struct_tree:bool -> Pdf.t -> int list -> Pdf.t

View File

@ -32,3 +32,5 @@ val validate_pagespec : string -> bool
(** Return a string for the given range. Knows how to identify all, odd, even,
x-y ranges etc. *)
val string_of_pagespec : Pdf.t -> int list -> string
val invert_range : int -> int list -> int list