From 97ceabeb0d3d28f629c4e6c816bfe7be80fae345 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 3 Jun 2025 16:03:13 +0100 Subject: [PATCH] Finishes -contains-javascript and -remove-javascript --- Makefile | 2 +- cpdfcommand.ml | 38 ++----------------------------------- cpdfjs.ml | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ cpdfjs.mli | 7 +++++++ 4 files changed, 61 insertions(+), 37 deletions(-) create mode 100644 cpdfjs.ml create mode 100644 cpdfjs.mli diff --git a/Makefile b/Makefile index 02438ac..85d26fc 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DOC = cpdfutil cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime \ cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfxobject \ cpdfimpose cpdfchop cpdftweak cpdfprinttree cpdfua cpdftexttopdf \ cpdftoc cpdfjpeg cpdfjpeg2000 cpdfpng cpdfimage cpdfdraw \ - cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol cpdfform \ + cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol cpdfform cpdfjs \ cpdfcommand MODS = $(NONDOC) $(DOC) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 1b4efab..fa2c70a 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -3829,40 +3829,6 @@ let write_images device res quality boxname annots antialias downsample spec pdf (ilist 1 endpage); Sys.remove tmppdf -let remove_javascript pdf = - (* Find /S /JavaScript and empty the /JS string. Also, Empty out any /URI (javascript). *) - Pdf.objselfmap (fun o -> o) pdf; - (* Process the /Root -> /Names -> /JavaScript *) - ignore (Pdf.remove_chain pdf ["/Root"; "/Names"; "/JavaScript"]) - -let contains_javascript pdf = - let found = ref false in - (* Any dictionary with /S /JavaScript or any /URI (javascript:...) *) - let rec contains_javascript_single_object f pdf = function - | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (contains_javascript_single_object f pdf) d) - | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> - f (Pdf.Stream {contents = (Pdf.recurse_dict (contains_javascript_single_object f pdf) dict, data)}) - | Pdf.Array a -> Pdf.recurse_array (contains_javascript_single_object f pdf) a - | x -> x - in - let f d = - begin match Pdf.lookup_direct pdf "/S" d with - | Some (Pdf.String "/JavaScript") -> set found - | _ -> () - end; - begin match Pdf.lookup_direct pdf "/URI" d with - | Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> set found; d - | _ -> d - end - in - Pdf.objiter (fun _ obj -> ignore (contains_javascript_single_object f pdf obj)) pdf; - (* Any /Root -> /Names -> /JavaScript *) - begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Names"; "/JavaScript"] with - | Some _ -> set found - | None -> () - end; - print_string (Printf.sprintf "%b" !found) - (* Main function *) let go () = check_bookmarks_mistake (); @@ -5031,11 +4997,11 @@ let go () = write_pdf false pdf | Some RemoveJavaScript -> let pdf = get_single_pdf args.op false in - remove_javascript pdf; + Cpdfjs.remove_javascript pdf; write_pdf false pdf | Some ContainsJavaScript -> let pdf = get_single_pdf args.op true in - contains_javascript pdf + print_string (Printf.sprintf "%b" (Cpdfjs.contains_javascript pdf)) (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfjs.ml b/cpdfjs.ml new file mode 100644 index 0000000..9c6bda6 --- /dev/null +++ b/cpdfjs.ml @@ -0,0 +1,51 @@ +open Pdfutil + +(* Empty any /JS string, Empty any /URI (javascript:). *) +let remove_javascript pdf = + let rec remove_javascript_single_object f pdf = function + | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (remove_javascript_single_object f pdf) d) + | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> + f (Pdf.Stream {contents = (Pdf.recurse_dict (remove_javascript_single_object f pdf) dict, data)}) + | Pdf.Array a -> Pdf.recurse_array (remove_javascript_single_object f pdf) a + | x -> x + in + let f d = + let d = + match Pdf.lookup_direct pdf "/JS" d with + | Some _ -> Pdf.add_dict_entry d "/JS" (Pdf.String "") + | None -> d + in + match Pdf.lookup_direct pdf "/URI" d with + | Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> Pdf.add_dict_entry d "/URI" (Pdf.String "") + | _ -> d + in + Pdf.objselfmap (remove_javascript_single_object f pdf) pdf; + ignore (Pdf.remove_chain pdf ["/Root"; "/Names"; "/JavaScript"]) + +(* Any dictionary with /S /JavaScript or any /URI (javascript:...) *) +let contains_javascript pdf = + let found = ref false in + let rec contains_javascript_single_object f pdf = function + | (Pdf.Dictionary d) -> f (Pdf.recurse_dict (contains_javascript_single_object f pdf) d) + | (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) -> + f (Pdf.Stream {contents = (Pdf.recurse_dict (contains_javascript_single_object f pdf) dict, data)}) + | Pdf.Array a -> Pdf.recurse_array (contains_javascript_single_object f pdf) a + | x -> x + in + let f d = + begin match Pdf.lookup_direct pdf "/S" d with + | Some (Pdf.String "/JavaScript") -> set found + | _ -> () + end; + begin match Pdf.lookup_direct pdf "/URI" d with + | Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> set found; d + | _ -> d + end + in + Pdf.objiter (fun _ obj -> ignore (contains_javascript_single_object f pdf obj)) pdf; + (* Any /Root -> /Names -> /JavaScript *) + begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Names"; "/JavaScript"] with + | Some _ -> set found + | None -> () + end; + !found diff --git a/cpdfjs.mli b/cpdfjs.mli new file mode 100644 index 0000000..15c9245 --- /dev/null +++ b/cpdfjs.mli @@ -0,0 +1,7 @@ +(** JavaScript *) + +(** True if a document contains JavaScript *) +val contains_javascript : Pdf.t -> bool + +(** Remove JavaScript from a document *) +val remove_javascript : Pdf.t -> unit