Very basic redaction

This commit is contained in:
John Whitington 2024-09-21 18:44:43 +01:00
parent 4e9bae2596
commit c8006e8cdb
5 changed files with 27 additions and 1 deletions

View File

@ -16,6 +16,7 @@ New features:
* -typeset can make PDF/UA documents
* -jpeg, -png and friends can make PDF/UA documents
o -stretch scales without preserving aspect ratio
o -redact removes whole pages
* = Supported by a grant from NLnet

View File

@ -228,6 +228,7 @@ type op =
| ExtractStructTree
| ReplaceStructTree of string
| SetLanguage of string
| Redact
let string_of_op = function
| PrintFontEncoding _ -> "PrintFontEncoding"
@ -379,6 +380,7 @@ let string_of_op = function
| ExtractStructTree -> "ExtractStructTree"
| ReplaceStructTree _ -> "ReplaceStructTree"
| SetLanguage _ -> "SetLanguage"
| Redact -> "Redact"
(* Inputs: filename, pagespec. *)
type input_kind =
@ -925,7 +927,7 @@ let banned banlist = function
| Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *)
| AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _
| Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _
| Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ ->
| Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ | Redact ->
mem Pdfcrypt.NoAssemble banlist
| TwoUp | TwoUpStack | RemoveBookmarks | AddRectangle | RemoveText|
Draft | Shift | ShiftBoxes | Scale | ScaleToFit|Stretch|RemoveAttachedFiles|
@ -2907,6 +2909,7 @@ let specs =
("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree");
("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format");
("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON");
("-redact", Arg.Unit (fun () -> setop Redact ()), " Redact entire pages");
(* These items are undocumented *)
("-debug", Arg.Unit setdebug, "");
("-debug-crypt", Arg.Unit (fun () -> args.debugcrypt <- true), "");
@ -4644,6 +4647,10 @@ let go () =
let pdf = get_single_pdf args.op false in
Cpdfmetadata.set_language pdf s;
write_pdf false pdf
| Some Redact ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
write_pdf false (Cpdfpage.redact pdf range)
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *)

View File

@ -1,4 +1,5 @@
%Document -stretch
%Document -redact
\documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf,
% dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.

View File

@ -995,3 +995,18 @@ let copy_box f t mediabox_if_missing pdf range =
else page))
pdf
range
(* TODO: Trim structure tree *)
let redact pdf range =
process_pages
(Pdfpage.ppstub
(fun pnum page ->
if mem pnum range then
{page with
Pdfpage.content = [];
Pdfpage.resources = Pdf.Dictionary [];
Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"}
else
page))
pdf
range

View File

@ -130,3 +130,5 @@ val alluprightonly : int list -> Pdf.t -> bool
(** When a page is transformed, its patterns must be too. *)
val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t
val redact : Pdf.t -> int list -> Pdf.t