This commit is contained in:
John Whitington 2021-10-29 15:09:21 +01:00
parent 3cb9e52ae3
commit 3142ae5251
5 changed files with 73 additions and 20 deletions

View File

@ -1,5 +1,6 @@
2.5 (Upcoming 2022)
o New -print-dict-entry operation prints values for a given key
o Extend -remove-dict-entry to allow search
o New -replace-dict-entry function to search & replace e.g URLs
o Output annotations in JSON form with -list-annotations-json

49
cpdf.ml
View File

@ -4239,19 +4239,50 @@ let remove_all_text range pdf =
(* 1. Extend remove_dict_entry with search term
2. Implement replace_dict_entry by analogy to remove_dict_entry *)
let rec remove_dict_entry_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (remove_dict_entry_single_object f pdf) d)
let rec dict_entry_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (dict_entry_single_object f pdf) d)
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
f (Pdf.Stream {contents = (Pdf.recurse_dict (remove_dict_entry_single_object f pdf) dict, data)})
| Pdf.Array a -> Pdf.recurse_array (remove_dict_entry_single_object f pdf) a
f (Pdf.Stream {contents = (Pdf.recurse_dict (dict_entry_single_object f pdf) dict, data)})
| Pdf.Array a -> Pdf.recurse_array (dict_entry_single_object f pdf) a
| x -> x
let remove_dict_entry pdf key =
let f d = Pdf.remove_dict_entry d key in
Pdf.objselfmap (remove_dict_entry_single_object f pdf) pdf;
pdf.Pdf.trailerdict <- remove_dict_entry_single_object f pdf pdf.Pdf.trailerdict
(* FIXME are we sure that functional values can never appear in the equality here? *)
let remove_dict_entry pdf key search =
let f d =
match search with
| None -> Pdf.remove_dict_entry d key
| Some s ->
match Pdf.lookup_direct pdf key d with
| Some v when v = s -> Pdf.remove_dict_entry d key
| _ -> d
in
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
let replace_dict_entry pdf key value search = ()
let replace_dict_entry pdf key value search =
let f d =
match search with
| None -> Pdf.replace_dict_entry d key value
| Some s ->
match Pdf.lookup_direct pdf key d with
| Some v when v = s -> Pdf.replace_dict_entry d key value
| _ -> d
in
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
(* FIXME no need to self map here, since nothing changes *)
let print_dict_entry pdf key =
let f d =
match Pdf.lookup_direct pdf key d with
| Some v ->
(* We use a double newline as a separator. *)
Printf.printf "%s\n\n" (Cpdfyojson.Safe.to_string (Cpdfjson.json_of_object pdf (fun _ -> ()) false false v));
d
| None -> d
in
Pdf.objselfmap (dict_entry_single_object f pdf) pdf;
pdf.Pdf.trailerdict <- dict_entry_single_object f pdf pdf.Pdf.trailerdict
let remove_clipping_ops pdf resources content =
let ops = Pdfops.parse_operators pdf resources content in

View File

@ -387,9 +387,11 @@ val ocg_order_all : Pdf.t -> unit
val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string
val remove_dict_entry : Pdf.t -> string -> unit
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit
val replace_dict_entry : Pdf.t -> string -> string -> string option -> unit
val replace_dict_entry : Pdf.t -> string -> Pdf.pdfobject -> Pdf.pdfobject option -> unit
val print_dict_entry : Pdf.t -> string -> unit
val remove_clipping : Pdf.t -> int list -> Pdf.t

View File

@ -181,6 +181,7 @@ type op =
| Revisions
| RemoveDictEntry of string
| ReplaceDictEntry of string
| PrintDictEntry of string
| ListSpotColours
| RemoveClipping
| SetMetadataDate of string
@ -201,6 +202,7 @@ type op =
| StampAsXObject of string
let string_of_op = function
| PrintDictEntry _ -> "PrintDictEntry"
| Impose _ -> "Impose"
| CopyFont _ -> "CopyFont"
| CountPages -> "CountPages"
@ -466,8 +468,8 @@ type args =
mutable impose_spacing : float;
mutable impose_linewidth : float;
mutable format_json : bool;
mutable replace_dict_entry_value : string;
mutable dict_entry_search : string option}
mutable replace_dict_entry_value : Pdf.pdfobject;
mutable dict_entry_search : Pdf.pdfobject option}
let args =
{op = None;
@ -582,7 +584,7 @@ let args =
impose_spacing = 0.;
impose_linewidth = 0.;
format_json = false;
replace_dict_entry_value = "";
replace_dict_entry_value = Pdf.Null;
dict_entry_search = None}
let reset_arguments () =
@ -683,7 +685,7 @@ let reset_arguments () =
args.impose_spacing <- 0.;
args.impose_linewidth <- 0.;
args.format_json <- false;
args.replace_dict_entry_value <- "";
args.replace_dict_entry_value <- Pdf.Null;
args.dict_entry_search <- None
(* Do not reset original_filename or cpdflin or was_encrypted or
* was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
@ -743,7 +745,7 @@ let banned banlist = function
| RemoveId | OpenAtPageFit _ | OpenAtPage _ | SetPageLayout _
| ShowBoxes | TrimMarks | CreateMetadata | SetMetadataDate _ | SetVersion _
| SetAuthor _|SetTitle _|SetSubject _|SetKeywords _|SetCreate _
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | SetMetadata _
| SetModify _|SetCreator _|SetProducer _|RemoveDictEntry _ | ReplaceDictEntry _ | PrintDictEntry _ | SetMetadata _
| ExtractText | ExtractImages | ExtractFontFile
| AddPageLabels | RemovePageLabels | OutputJSON | OCGCoalesce
| OCGRename | OCGList | OCGOrderAll
@ -1599,11 +1601,22 @@ let setimposelinewidth f =
let setreplacedictentry s =
setop (ReplaceDictEntry s) ()
let setprintdictentry s =
setop (PrintDictEntry s) ()
let setreplacedictentryvalue s =
args.replace_dict_entry_value <- s
try
let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in
args.replace_dict_entry_value <- pdfobj
with
e -> error (Printf.sprintf "Failed to parse replacement value: %s\n" (Printexc.to_string e))
let setdictentrysearch s =
args.dict_entry_search <- Some s
try
let pdfobj = Cpdfjson.object_of_json (Cpdfyojson.Safe.from_string s) in
args.dict_entry_search <- Some pdfobj
with
e -> error (Printf.sprintf "Failed to parse search term: %s\n" (Printexc.to_string e))
let whingemalformed () =
prerr_string "Command line must be of exactly the form\ncpdf <infile> -gs <path> -gs-malformed-force -o <outfile>\n";
@ -2254,6 +2267,9 @@ and specs =
("-dict-entry-search",
Arg.String setdictentrysearch,
" Search string for -remove-dict-entry and -replace-dict-entry");
("-print-dict-entry",
Arg.String setprintdictentry,
" Print dictionary values of a given key");
("-producer",
Arg.String setproduceraswego,
" Change the /Producer entry in the /Info dictionary");
@ -4128,12 +4144,15 @@ let go () =
(map Pdfpagelabels.string_of_pagelabel (Pdfpagelabels.read pdf))
| Some (RemoveDictEntry key) ->
let pdf = get_single_pdf args.op true in
Cpdf.remove_dict_entry pdf key;
Cpdf.remove_dict_entry pdf key args.dict_entry_search;
write_pdf false pdf
| Some (ReplaceDictEntry key) ->
let pdf = get_single_pdf args.op true in
Cpdf.replace_dict_entry pdf key args.replace_dict_entry_value args.dict_entry_search;
write_pdf false pdf
| Some (PrintDictEntry key) ->
let pdf = get_single_pdf args.op true in
Cpdf.print_dict_entry pdf key
| Some ListSpotColours ->
let pdf = get_single_pdf args.op false in
list_spot_colours pdf

View File

@ -4,7 +4,7 @@
%Document -impose and friends (inc. 0-w, 0-h for long ones, how lines scale etc., undefined if pages different sizes)
%Document -bookmarks-json including mentioning UTF8
%Document -list-annotations-json
%Document -replace-dict-entry and search extension to -remove-dict-entry
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
\documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}