diff --git a/Changes b/Changes index 127f7ab..218aaf3 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,6 @@ 2.5 (Upcoming 2022) +o New -print-dict-entry operation prints values for a given key o Extend -remove-dict-entry to allow search o New -replace-dict-entry function to search & replace e.g URLs o Output annotations in JSON form with -list-annotations-json diff --git a/cpdf.ml b/cpdf.ml index af64fdf..43f044a 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -4239,19 +4239,50 @@ let remove_all_text range pdf = (* 1. Extend remove_dict_entry with search term 2. Implement replace_dict_entry by analogy to remove_dict_entry *) -let rec remove_dict_entry_single_object f pdf = function - | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (remove_dict_entry_single_object f pdf) d) +let rec dict_entry_single_object f pdf = function + | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d) | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> - f (Pdf.Stream {contents = (Pdf.recurse_dict (remove_dict_entry_single_object f pdf) dict, data)}) - | Pdf.Array a -> Pdf.recurse_array (remove_dict_entry_single_object f pdf) a + f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)}) + | Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a | x -> x -let remove_dict_entry pdf key = - let f d = Pdf.remove_dict_entry d key in - Pdf.objselfmap (remove_dict_entry_single_object f pdf) pdf; - pdf.Pdf.trailerdict <- remove_dict_entry_single_object f pdf pdf.Pdf.trailerdict +(* FIXME are we sure that functional values can never appear in the equality here? *) +let remove_dict_entry pdf key search = + let f d = + match search with + | None -> Pdf.remove_dict_entry d key + | Some s -> + match Pdf.lookup_direct pdf key d with + | Some v when v = s -> Pdf.remove_dict_entry d key + | _ -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict -let replace_dict_entry pdf key value search = () +let replace_dict_entry pdf key value search = + let f d = + match search with + | None -> Pdf.replace_dict_entry d key value + | Some s -> + match Pdf.lookup_direct pdf key d with + | Some v when v = s -> Pdf.replace_dict_entry d key value + | _ -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict + +(* FIXME no need to self map here, since nothing changes *) +let print_dict_entry pdf key = + let f d = + match Pdf.lookup_direct pdf key d with + | Some v -> + (* We use a double newline as a separator. *) + Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v)); + d + | None -> d + in + Pdf.objselfmap (dict_entry_single_object f pdf) pdf; + pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict let remove_clipping_ops pdf resources content = let ops = Pdfops.parse_operators pdf resources content in diff --git a/cpdf.mli b/cpdf.mli index a78126b..ab554c8 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -387,9 +387,11 @@ val ocg_order_all : Pdf.t -> unit val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string -val remove_dict_entry : Pdf.t -> string -> unit +val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit -val replace_dict_entry : Pdf.t -> string -> string -> string option -> unit +val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit + +val print_dict_entry : Pdf.t -> string -> unit val remove_clipping : Pdf.t -> int list -> Pdf.t diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 7b80f36..35746dd 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -181,6 +181,7 @@ type op = | Revisions | RemoveDictEntry of string | ReplaceDictEntry of string + | PrintDictEntry of string | ListSpotColours | RemoveClipping | SetMetadataDate of string @@ -201,6 +202,7 @@ type op = | StampAsXObject of string let string_of_op = function + | PrintDictEntry _ -> "PrintDictEntry" | Impose _ -> "Impose" | CopyFont _ -> "CopyFont" | CountPages -> "CountPages" @@ -466,8 +468,8 @@ type args = mutable impose_spacing : float; mutable impose_linewidth : float; mutable format_json : bool; - mutable replace_dict_entry_value : string; - mutable dict_entry_search : string option} + mutable replace_dict_entry_value : Pdf.pdfobject; + mutable dict_entry_search : Pdf.pdfobject option} let args = {op = None; @@ -582,7 +584,7 @@ let args = impose_spacing = 0.; impose_linewidth = 0.; format_json = false; - replace_dict_entry_value = ""; + replace_dict_entry_value = Pdf.Null; dict_entry_search = None} let reset_arguments () = @@ -683,7 +685,7 @@ let reset_arguments () = args.impose_spacing <- 0.; args.impose_linewidth <- 0.; args.format_json <- false; - args.replace_dict_entry_value <- ""; + args.replace_dict_entry_value <- Pdf.Null; args.dict_entry_search <- None (* Do not reset original_filename or cpdflin or was_encrypted or * was_decrypted_with_owner or recrypt or producer or creator or path_to_* or @@ -743,7 +745,7 @@ let banned banlist = function | RemoveId | OpenAtPageFit _ | OpenAtPage _ | SetPageLayout _ | ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _ | SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _ - | SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | SetMetadata _ + | SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | PrintDictEntry _ | SetMetadata _ | ExtractText | ExtractImages | ExtractFontFile | AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce | OCGRename | OCGList | OCGOrderAll @@ -1599,11 +1601,22 @@ let setimposelinewidth f = let setreplacedictentry s = setop (ReplaceDictEntry s) () +let setprintdictentry s = + setop (PrintDictEntry s) () + let setreplacedictentryvalue s = - args.replace_dict_entry_value <- s + try + let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in + args.replace_dict_entry_value <- pdfobj + with + e -> error (Printf.sprintf "Failed to parse replacement value: %s\n" (Printexc.to_string e)) let setdictentrysearch s = - args.dict_entry_search <- Some s + try + let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in + args.dict_entry_search <- Some pdfobj + with + e -> error (Printf.sprintf "Failed to parse search term: %s\n" (Printexc.to_string e)) let whingemalformed () = prerr_string "Command line must be of exactly the form\ncpdf -gs -gs-malformed-force -o \n"; @@ -2254,6 +2267,9 @@ and specs = ("-dict-entry-search", Arg.String setdictentrysearch, " Search string for -remove-dict-entry and -replace-dict-entry"); + ("-print-dict-entry", + Arg.String setprintdictentry, + " Print dictionary values of a given key"); ("-producer", Arg.String setproduceraswego, " Change the /Producer entry in the /Info dictionary"); @@ -4128,12 +4144,15 @@ let go () = (map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf)) | Some (RemoveDictEntry key) -> let pdf = get_single_pdf args.op true in - Cpdf.remove_dict_entry pdf key; + Cpdf.remove_dict_entry pdf key args.dict_entry_search; write_pdf false pdf | Some (ReplaceDictEntry key) -> let pdf = get_single_pdf args.op true in Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search; write_pdf false pdf + | Some (PrintDictEntry key) -> + let pdf = get_single_pdf args.op true in + Cpdf.print_dict_entry pdf key | Some ListSpotColours -> let pdf = get_single_pdf args.op false in list_spot_colours pdf diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 0525260..c039da7 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -4,7 +4,7 @@ %Document -impose and friends (inc. 0-w, 0-h for long ones, how lines scale etc., undefined if pages different sizes) %Document -bookmarks-json including mentioning UTF8 %Document -list-annotations-json -%Document -replace-dict-entry and search extension to -remove-dict-entry +%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}