From c8006e8cdb2473e937125ed02a3748fe24177323 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sat, 21 Sep 2024 18:44:43 +0100 Subject: [PATCH] Very basic redaction --- Changes | 1 + cpdfcommand.ml | 9 ++++++++- cpdfmanual.tex | 1 + cpdfpage.ml | 15 +++++++++++++++ cpdfpage.mli | 2 ++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 5fac41b..0b33baa 100644 --- a/Changes +++ b/Changes @@ -16,6 +16,7 @@ New features: * -typeset can make PDF/UA documents * -jpeg, -png and friends can make PDF/UA documents o -stretch scales without preserving aspect ratio +o -redact removes whole pages * = Supported by a grant from NLnet diff --git a/cpdfcommand.ml b/cpdfcommand.ml index fbd2399..a726490 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -228,6 +228,7 @@ type op = | ExtractStructTree | ReplaceStructTree of string | SetLanguage of string + | Redact let string_of_op = function | PrintFontEncoding _ -> "PrintFontEncoding" @@ -379,6 +380,7 @@ let string_of_op = function | ExtractStructTree -> "ExtractStructTree" | ReplaceStructTree _ -> "ReplaceStructTree" | SetLanguage _ -> "SetLanguage" + | Redact -> "Redact" (* Inputs: filename, pagespec. *) type input_kind = @@ -925,7 +927,7 @@ let banned banlist = function | Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *) | AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _ | Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _ - | Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ -> + | Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ | Redact -> mem Pdfcrypt.NoAssemble banlist | TwoUp | TwoUpStack | RemoveBookmarks | AddRectangle | RemoveText| Draft | Shift | ShiftBoxes | Scale | ScaleToFit|Stretch|RemoveAttachedFiles| @@ -2907,6 +2909,7 @@ let specs = ("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree"); ("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format"); ("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON"); + ("-redact", Arg.Unit (fun () -> setop Redact ()), " Redact entire pages"); (* These items are undocumented *) ("-debug", Arg.Unit setdebug, ""); ("-debug-crypt", Arg.Unit (fun () -> args.debugcrypt <- true), ""); @@ -4644,6 +4647,10 @@ let go () = let pdf = get_single_pdf args.op false in Cpdfmetadata.set_language pdf s; write_pdf false pdf + | Some Redact -> + let pdf = get_single_pdf args.op false in + let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in + write_pdf false (Cpdfpage.redact pdf range) (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 74ab3eb..78b4c3e 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -1,4 +1,5 @@ %Document -stretch +%Document -redact \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc. diff --git a/cpdfpage.ml b/cpdfpage.ml index 83e6451..4aae5e6 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -995,3 +995,18 @@ let copy_box f t mediabox_if_missing pdf range = else page)) pdf range + +(* TODO: Trim structure tree *) +let redact pdf range = + process_pages + (Pdfpage.ppstub + (fun pnum page -> + if mem pnum range then + {page with + Pdfpage.content = []; + Pdfpage.resources = Pdf.Dictionary []; + Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"} + else + page)) + pdf + range diff --git a/cpdfpage.mli b/cpdfpage.mli index e139882..243ec16 100644 --- a/cpdfpage.mli +++ b/cpdfpage.mli @@ -130,3 +130,5 @@ val alluprightonly : int list -> Pdf.t -> bool (** When a page is transformed, its patterns must be too. *) val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t + +val redact : Pdf.t -> int list -> Pdf.t