From 6a52728e9dc47b9bb8c9fee28d58f6333d429fbc Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 28 Oct 2021 17:06:46 +0100 Subject: [PATCH] beginning -replace-dict-entry --- Changes | 2 ++ cpdf.ml | 13 ++++++++++--- cpdf.mli | 2 ++ cpdfcommand.ml | 38 ++++++++++++++++++++++++++++++++++---- cpdfmanual.tex | 2 +- 5 files changed, 49 insertions(+), 8 deletions(-) diff --git a/Changes b/Changes index 1a753b4..127f7ab 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,7 @@ 2.5 (Upcoming 2022) +o Extend -remove-dict-entry to allow search +o New -replace-dict-entry function to search & replace e.g URLs o Output annotations in JSON form with -list-annotations-json o Basic merging of AcroForms o Output bookmarks in JSON format with -list-bookmarks-json diff --git a/cpdf.ml b/cpdf.ml index d41d65f..de03383 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -2621,15 +2621,20 @@ let list_page_annotations encoding pdf num page = iter (print_annotation encoding pdf num) (map (Pdf.direct pdf) annots) | _ -> () -let annotations_json_page pdf page = +let annotations_json_page pdf page pagenum = match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with | Some (Pdf.Array annots) -> - map (Cpdfjson.json_of_object pdf (fun _ -> ()) false false) (map (Pdf.direct pdf) annots) + map + (fun annot -> + `List [`Int pagenum; Cpdfjson.json_of_object pdf (fun _ -> ()) false false annot]) + (map (Pdf.direct pdf) annots) | _ -> [] let list_annotations_json pdf = let module J = Cpdfyojson.Safe in - let json = `List (flatten (map (annotations_json_page pdf) (Pdfpage.pages_of_pagetree pdf))) in + let pages = Pdfpage.pages_of_pagetree pdf in + let pagenums = indx pages in + let json = `List (flatten (map2 (annotations_json_page pdf) pages pagenums)) in J.pretty_to_channel stdout json let list_annotations ~json encoding pdf = @@ -4241,6 +4246,8 @@ let remove_dict_entry pdf key = pdf; pdf.Pdf.trailerdict <- Pdf.remove_dict_entry pdf.Pdf.trailerdict key +let replace_dict_entry pdf key value search = () + let remove_clipping_ops pdf resources content = let ops = Pdfops.parse_operators pdf resources content in let rec process a = function diff --git a/cpdf.mli b/cpdf.mli index 4a8c2cb..a78126b 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -389,6 +389,8 @@ val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string val remove_dict_entry : Pdf.t -> string -> unit +val replace_dict_entry : Pdf.t -> string -> string -> string option -> unit + val remove_clipping : Pdf.t -> int list -> Pdf.t val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list diff --git a/cpdfcommand.ml b/cpdfcommand.ml index e7dbeb2..7b80f36 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -180,6 +180,7 @@ type op = | PrintPageLabels | Revisions | RemoveDictEntry of string + | ReplaceDictEntry of string | ListSpotColours | RemoveClipping | SetMetadataDate of string @@ -299,6 +300,7 @@ let string_of_op = function | PrintPageLabels -> "PrintPageLabels" | Revisions -> "Revisions" | RemoveDictEntry _ -> "RemoveDictEntry" + | ReplaceDictEntry _ -> "ReplaceDictEntry" | ListSpotColours -> "ListSpotColours" | RemoveClipping -> "RemoveClipping" | Trim -> "Trim" @@ -463,7 +465,9 @@ type args = mutable impose_margin : float; mutable impose_spacing : float; mutable impose_linewidth : float; - mutable format_json : bool} + mutable format_json : bool; + mutable replace_dict_entry_value : string; + mutable dict_entry_search : string option} let args = {op = None; @@ -577,7 +581,9 @@ let args = impose_margin = 0.; impose_spacing = 0.; impose_linewidth = 0.; - format_json = false} + format_json = false; + replace_dict_entry_value = ""; + dict_entry_search = None} let reset_arguments () = args.op <- None; @@ -676,7 +682,9 @@ let reset_arguments () = args.impose_margin <- 0.; args.impose_spacing <- 0.; args.impose_linewidth <- 0.; - args.format_json <- false + args.format_json <- false; + args.replace_dict_entry_value <- ""; + args.dict_entry_search <- None (* Do not reset original_filename or cpdflin or was_encrypted or * was_decrypted_with_owner or recrypt or producer or creator or path_to_* or * gs_malformed or gs_quiet, since we want these to work across ANDs. Or @@ -735,7 +743,7 @@ let banned banlist = function | RemoveId | OpenAtPageFit _ | OpenAtPage _ | SetPageLayout _ | ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _ | SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _ - | SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | SetMetadata _ + | SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | SetMetadata _ | ExtractText | ExtractImages | ExtractFontFile | AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce | OCGRename | OCGList | OCGOrderAll @@ -1588,6 +1596,15 @@ let setimposespacing f = let setimposelinewidth f = args.impose_linewidth <- f +let setreplacedictentry s = + setop (ReplaceDictEntry s) () + +let setreplacedictentryvalue s = + args.replace_dict_entry_value <- s + +let setdictentrysearch s = + args.dict_entry_search <- Some s + let whingemalformed () = prerr_string "Command line must be of exactly the form\ncpdf -gs -gs-malformed-force -o \n"; exit 1 @@ -2228,6 +2245,15 @@ and specs = ("-remove-dict-entry", Arg.String setremovedictentry, " Remove an entry from all dictionaries"); + ("-replace-dict-entry", + Arg.String setreplacedictentry, + " Remove an entry from all dictionaries"); + ("-replace-dict-entry-value", + Arg.String setreplacedictentryvalue, + " Replacement value for -replace-dict-entry"); + ("-dict-entry-search", + Arg.String setdictentrysearch, + " Search string for -remove-dict-entry and -replace-dict-entry"); ("-producer", Arg.String setproduceraswego, " Change the /Producer entry in the /Info dictionary"); @@ -4104,6 +4130,10 @@ let go () = let pdf = get_single_pdf args.op true in Cpdf.remove_dict_entry pdf key; write_pdf false pdf + | Some (ReplaceDictEntry key) -> + let pdf = get_single_pdf args.op true in + Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search; + write_pdf false pdf | Some ListSpotColours -> let pdf = get_single_pdf args.op false in list_spot_colours pdf diff --git a/cpdfmanual.tex b/cpdfmanual.tex index a70c2f0..0525260 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -4,7 +4,7 @@ %Document -impose and friends (inc. 0-w, 0-h for long ones, how lines scale etc., undefined if pages different sizes) %Document -bookmarks-json including mentioning UTF8 %Document -list-annotations-json - +%Document -replace-dict-entry and search extension to -remove-dict-entry \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}