beginning -replace-dict-entry

This commit is contained in:
John Whitington 2021-10-28 17:06:46 +01:00
parent 66e4224872
commit 6a52728e9d
5 changed files with 49 additions and 8 deletions

View File

@ -1,5 +1,7 @@
2.5 (Upcoming 2022)
o Extend -remove-dict-entry to allow search
o New -replace-dict-entry function to search & replace e.g URLs
o Output annotations in JSON form with -list-annotations-json
o Basic merging of AcroForms
o Output bookmarks in JSON format with -list-bookmarks-json

13
cpdf.ml
View File

@ -2621,15 +2621,20 @@ let list_page_annotations encoding pdf num page =
iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots)
| _ -> ()
let annotations_json_page pdf page =
let annotations_json_page pdf page pagenum =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array annots) ->
map (Cpdfjson.json_of_object pdf (fun _ -> ()) false false) (map (Pdf.direct pdf) annots)
map
(fun annot ->
`List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot])
(map (Pdf.direct pdf) annots)
| _ -> []
let list_annotations_json pdf =
let module J = Cpdfyojson.Safe in
let json = `List (flatten (map (annotations_json_page pdf) (Pdfpage.pages_of_pagetree pdf))) in
let pages = Pdfpage.pages_of_pagetree pdf in
let pagenums = indx pages in
let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in
J.pretty_to_channel stdout json
let list_annotations ~json encoding pdf =
@ -4241,6 +4246,8 @@ let remove_dict_entry pdf key =
pdf;
pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict key
let replace_dict_entry pdf key value search = ()
let remove_clipping_ops pdf resources content =
let ops = Pdfops.parse_operators pdf resources content in
let rec process a = function

View File

@ -389,6 +389,8 @@ val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string
val remove_dict_entry : Pdf.t -> string -> unit
val replace_dict_entry : Pdf.t -> string -> string -> string option -> unit
val remove_clipping : Pdf.t -> int list -> Pdf.t
val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list

View File

@ -180,6 +180,7 @@ type op =
| PrintPageLabels
| Revisions
| RemoveDictEntry of string
| ReplaceDictEntry of string
| ListSpotColours
| RemoveClipping
| SetMetadataDate of string
@ -299,6 +300,7 @@ let string_of_op = function
| PrintPageLabels -> "PrintPageLabels"
| Revisions -> "Revisions"
| RemoveDictEntry _ -> "RemoveDictEntry"
| ReplaceDictEntry _ -> "ReplaceDictEntry"
| ListSpotColours -> "ListSpotColours"
| RemoveClipping -> "RemoveClipping"
| Trim -> "Trim"
@ -463,7 +465,9 @@ type args =
mutable impose_margin : float;
mutable impose_spacing : float;
mutable impose_linewidth : float;
mutable format_json : bool}
mutable format_json : bool;
mutable replace_dict_entry_value : string;
mutable dict_entry_search : string option}
let args =
{op = None;
@ -577,7 +581,9 @@ let args =
impose_margin = 0.;
impose_spacing = 0.;
impose_linewidth = 0.;
format_json = false}
format_json = false;
replace_dict_entry_value = "";
dict_entry_search = None}
let reset_arguments () =
args.op <- None;
@ -676,7 +682,9 @@ let reset_arguments () =
args.impose_margin <- 0.;
args.impose_spacing <- 0.;
args.impose_linewidth <- 0.;
args.format_json <- false
args.format_json <- false;
args.replace_dict_entry_value <- "";
args.dict_entry_search <- None
(* Do not reset original_filename or cpdflin or was_encrypted or
* was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
* gs_malformed or gs_quiet, since we want these to work across ANDs. Or
@ -735,7 +743,7 @@ let banned banlist = function
| RemoveId | OpenAtPageFit _ | OpenAtPage _ | SetPageLayout _
| ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _
| SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | SetMetadata _
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | SetMetadata _
| ExtractText | ExtractImages | ExtractFontFile
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
| OCGRename | OCGList | OCGOrderAll
@ -1588,6 +1596,15 @@ let setimposespacing f =
let setimposelinewidth f =
args.impose_linewidth <- f
let setreplacedictentry s =
setop (ReplaceDictEntry s) ()
let setreplacedictentryvalue s =
args.replace_dict_entry_value <- s
let setdictentrysearch s =
args.dict_entry_search <- Some s
let whingemalformed () =
prerr_string "Command line must be of exactly the form\ncpdf <infile> -gs <path> -gs-malformed-force -o <outfile>\n";
exit 1
@ -2228,6 +2245,15 @@ and specs =
("-remove-dict-entry",
Arg.String setremovedictentry,
" Remove an entry from all dictionaries");
("-replace-dict-entry",
Arg.String setreplacedictentry,
" Remove an entry from all dictionaries");
("-replace-dict-entry-value",
Arg.String setreplacedictentryvalue,
" Replacement value for -replace-dict-entry");
("-dict-entry-search",
Arg.String setdictentrysearch,
" Search string for -remove-dict-entry and -replace-dict-entry");
("-producer",
Arg.String setproduceraswego,
" Change the /Producer entry in the /Info dictionary");
@ -4104,6 +4130,10 @@ let go () =
let pdf = get_single_pdf args.op true in
Cpdf.remove_dict_entry pdf key;
write_pdf false pdf
| Some (ReplaceDictEntry key) ->
let pdf = get_single_pdf args.op true in
Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
write_pdf false pdf
| Some ListSpotColours ->
let pdf = get_single_pdf args.op false in
list_spot_colours pdf

View File

@ -4,7 +4,7 @@
%Document -impose and friends (inc. 0-w, 0-h for long ones, how lines scale etc., undefined if pages different sizes)
%Document -bookmarks-json including mentioning UTF8
%Document -list-annotations-json
%Document -replace-dict-entry and search extension to -remove-dict-entry
\documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}