Very basic redaction

This commit is contained in:
John Whitington 2024-09-21 18:44:43 +01:00
parent 4e9bae2596
commit c8006e8cdb
5 changed files with 27 additions and 1 deletions

View File

@ -16,6 +16,7 @@ New features:
* -typeset can make PDF/UA documents * -typeset can make PDF/UA documents
* -jpeg, -png and friends can make PDF/UA documents * -jpeg, -png and friends can make PDF/UA documents
o -stretch scales without preserving aspect ratio o -stretch scales without preserving aspect ratio
o -redact removes whole pages
* = Supported by a grant from NLnet * = Supported by a grant from NLnet

View File

@ -228,6 +228,7 @@ type op =
| ExtractStructTree | ExtractStructTree
| ReplaceStructTree of string | ReplaceStructTree of string
| SetLanguage of string | SetLanguage of string
| Redact
let string_of_op = function let string_of_op = function
| PrintFontEncoding _ -> "PrintFontEncoding" | PrintFontEncoding _ -> "PrintFontEncoding"
@ -379,6 +380,7 @@ let string_of_op = function
| ExtractStructTree -> "ExtractStructTree" | ExtractStructTree -> "ExtractStructTree"
| ReplaceStructTree _ -> "ReplaceStructTree" | ReplaceStructTree _ -> "ReplaceStructTree"
| SetLanguage _ -> "SetLanguage" | SetLanguage _ -> "SetLanguage"
| Redact -> "Redact"
(* Inputs: filename, pagespec. *) (* Inputs: filename, pagespec. *)
type input_kind = type input_kind =
@ -925,7 +927,7 @@ let banned banlist = function
| Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *) | Decrypt | Encrypt | CombinePages _ -> true (* Never allowed *)
| AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _ | AddBookmarks _ | PadBefore | PadAfter | PadEvery _ | PadMultiple _ | PadMultipleBefore _
| Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _ | Merge | Split | SplitOnBookmarks _ | SplitMax _ | Spray | RotateContents _ | Rotate _
| Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ -> | Rotateby _ | Upright | VFlip | HFlip | Impose _ | Chop _ | ChopHV _ | Redact ->
mem Pdfcrypt.NoAssemble banlist mem Pdfcrypt.NoAssemble banlist
| TwoUp | TwoUpStack | RemoveBookmarks | AddRectangle | RemoveText| | TwoUp | TwoUpStack | RemoveBookmarks | AddRectangle | RemoveText|
Draft | Shift | ShiftBoxes | Scale | ScaleToFit|Stretch|RemoveAttachedFiles| Draft | Shift | ShiftBoxes | Scale | ScaleToFit|Stretch|RemoveAttachedFiles|
@ -2907,6 +2909,7 @@ let specs =
("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree"); ("-print-struct-tree", Arg.Unit (fun () -> setop PrintStructTree ()), " Print structure tree");
("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format"); ("-extract-struct-tree", Arg.Unit (fun () -> setop ExtractStructTree ()), " Extract structure tree in JSON format");
("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON"); ("-replace-struct-tree", Arg.String (fun s -> setop (ReplaceStructTree s) ()), " Replace structure tree from JSON");
("-redact", Arg.Unit (fun () -> setop Redact ()), " Redact entire pages");
(* These items are undocumented *) (* These items are undocumented *)
("-debug", Arg.Unit setdebug, ""); ("-debug", Arg.Unit setdebug, "");
("-debug-crypt", Arg.Unit (fun () -> args.debugcrypt <- true), ""); ("-debug-crypt", Arg.Unit (fun () -> args.debugcrypt <- true), "");
@ -4644,6 +4647,10 @@ let go () =
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
Cpdfmetadata.set_language pdf s; Cpdfmetadata.set_language pdf s;
write_pdf false pdf write_pdf false pdf
| Some Redact ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
write_pdf false (Cpdfpage.redact pdf range)
(* Advise the user if a combination of command line flags makes little sense, (* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *) or error out if it make no sense at all. *)

View File

@ -1,4 +1,5 @@
%Document -stretch %Document -stretch
%Document -redact
\documentclass{book} \documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf,
% dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc. % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.

View File

@ -995,3 +995,18 @@ let copy_box f t mediabox_if_missing pdf range =
else page)) else page))
pdf pdf
range range
(* TODO: Trim structure tree *)
let redact pdf range =
process_pages
(Pdfpage.ppstub
(fun pnum page ->
if mem pnum range then
{page with
Pdfpage.content = [];
Pdfpage.resources = Pdf.Dictionary [];
Pdfpage.rest = Pdf.remove_dict_entry page.Pdfpage.rest "/Annots"}
else
page))
pdf
range

View File

@ -130,3 +130,5 @@ val alluprightonly : int list -> Pdf.t -> bool
(** When a page is transformed, its patterns must be too. *) (** When a page is transformed, its patterns must be too. *)
val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t val change_pattern_matrices_page : Pdf.t -> Pdftransform.transform_matrix -> Pdfpage.t -> Pdfpage.t
val redact : Pdf.t -> int list -> Pdf.t