Finishes -contains-javascript and -remove-javascript

This commit is contained in:
John Whitington
2025-06-03 16:03:13 +01:00
parent ad25ad22c9
commit 97ceabeb0d
4 changed files with 61 additions and 37 deletions

View File

@ -8,7 +8,7 @@ DOC = cpdfutil cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime \
cpdfdraft cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfxobject \
cpdfimpose cpdfchop cpdftweak cpdfprinttree cpdfua cpdftexttopdf \
cpdftoc cpdfjpeg cpdfjpeg2000 cpdfpng cpdfimage cpdfdraw \
cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol cpdfform \
cpdfcomposition cpdfshape cpdfcolours cpdfdrawcontrol cpdfform cpdfjs \
cpdfcommand
MODS = $(NONDOC) $(DOC)

View File

@ -3829,40 +3829,6 @@ let write_images device res quality boxname annots antialias downsample spec pdf
(ilist 1 endpage);
Sys.remove tmppdf
let remove_javascript pdf =
(* Find /S /JavaScript and empty the /JS string. Also, Empty out any /URI (javascript). *)
Pdf.objselfmap (fun o -> o) pdf;
(* Process the /Root -> /Names -> /JavaScript *)
ignore (Pdf.remove_chain pdf ["/Root"; "/Names"; "/JavaScript"])
let contains_javascript pdf =
let found = ref false in
(* Any dictionary with /S /JavaScript or any /URI (javascript:...) *)
let rec contains_javascript_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (contains_javascript_single_object f pdf) d)
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
f (Pdf.Stream {contents = (Pdf.recurse_dict (contains_javascript_single_object f pdf) dict, data)})
| Pdf.Array a -> Pdf.recurse_array (contains_javascript_single_object f pdf) a
| x -> x
in
let f d =
begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.String "/JavaScript") -> set found
| _ -> ()
end;
begin match Pdf.lookup_direct pdf "/URI" d with
| Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> set found; d
| _ -> d
end
in
Pdf.objiter (fun _ obj -> ignore (contains_javascript_single_object f pdf obj)) pdf;
(* Any /Root -> /Names -> /JavaScript *)
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Names"; "/JavaScript"] with
| Some _ -> set found
| None -> ()
end;
print_string (Printf.sprintf "%b" !found)
(* Main function *)
let go () =
check_bookmarks_mistake ();
@ -5031,11 +4997,11 @@ let go () =
write_pdf false pdf
| Some RemoveJavaScript ->
let pdf = get_single_pdf args.op false in
remove_javascript pdf;
Cpdfjs.remove_javascript pdf;
write_pdf false pdf
| Some ContainsJavaScript ->
let pdf = get_single_pdf args.op true in
contains_javascript pdf
print_string (Printf.sprintf "%b" (Cpdfjs.contains_javascript pdf))
(* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *)

51
cpdfjs.ml Normal file
View File

@ -0,0 +1,51 @@
open Pdfutil
(* Empty any /JS string, Empty any /URI (javascript:). *)
let remove_javascript pdf =
let rec remove_javascript_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (remove_javascript_single_object f pdf) d)
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
f (Pdf.Stream {contents = (Pdf.recurse_dict (remove_javascript_single_object f pdf) dict, data)})
| Pdf.Array a -> Pdf.recurse_array (remove_javascript_single_object f pdf) a
| x -> x
in
let f d =
let d =
match Pdf.lookup_direct pdf "/JS" d with
| Some _ -> Pdf.add_dict_entry d "/JS" (Pdf.String "")
| None -> d
in
match Pdf.lookup_direct pdf "/URI" d with
| Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> Pdf.add_dict_entry d "/URI" (Pdf.String "")
| _ -> d
in
Pdf.objselfmap (remove_javascript_single_object f pdf) pdf;
ignore (Pdf.remove_chain pdf ["/Root"; "/Names"; "/JavaScript"])
(* Any dictionary with /S /JavaScript or any /URI (javascript:...) *)
let contains_javascript pdf =
let found = ref false in
let rec contains_javascript_single_object f pdf = function
| (Pdf.Dictionary d) -> f (Pdf.recurse_dict (contains_javascript_single_object f pdf) d)
| (Pdf.Stream {contents = (Pdf.Dictionary dict, data)}) ->
f (Pdf.Stream {contents = (Pdf.recurse_dict (contains_javascript_single_object f pdf) dict, data)})
| Pdf.Array a -> Pdf.recurse_array (contains_javascript_single_object f pdf) a
| x -> x
in
let f d =
begin match Pdf.lookup_direct pdf "/S" d with
| Some (Pdf.String "/JavaScript") -> set found
| _ -> ()
end;
begin match Pdf.lookup_direct pdf "/URI" d with
| Some (Pdf.String s) when String.length s >= 11 && String.sub s 0 11 = "javascript:" -> set found; d
| _ -> d
end
in
Pdf.objiter (fun _ obj -> ignore (contains_javascript_single_object f pdf obj)) pdf;
(* Any /Root -> /Names -> /JavaScript *)
begin match Pdf.lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/Names"; "/JavaScript"] with
| Some _ -> set found
| None -> ()
end;
!found

7
cpdfjs.mli Normal file
View File

@ -0,0 +1,7 @@
(** JavaScript *)
(** True if a document contains JavaScript *)
val contains_javascript : Pdf.t -> bool
(** Remove JavaScript from a document *)
val remove_javascript : Pdf.t -> unit