From 1048bdf1dfe2d7141355891ea70476a75da656f2 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sat, 18 Dec 2021 16:14:31 +0000 Subject: [PATCH] more --- Makefile | 2 +- cpdf.ml | 174 ------------------------------------------------- cpdf.mli | 14 ---- cpdfcommand.ml | 54 +++++++++++++-- cpdfocg.ml | 134 +++++++++++++++++++++++++++++++++++++ cpdfocg.mli | 9 +++ 6 files changed, 192 insertions(+), 195 deletions(-) create mode 100644 cpdfocg.ml create mode 100644 cpdfocg.mli diff --git a/Makefile b/Makefile index ca91a5f..2c60f7b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ MODS = cpdfyojson cpdfxmlm \ cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \ - cpdftexttopdf cpdftoc cpdfpad cpdfcommand + cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfcommand SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml diff --git a/cpdf.ml b/cpdf.ml index 5676d98..8da0b53 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -59,46 +59,6 @@ let report_pdf_size pdf = flush stdout; close_in fh -(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume -installed at a system place *) -let find_cpdflin provided = - match provided with - Some x -> x - | None -> - let dotslash = match Sys.os_type with "Win32" -> "" | _ -> "./" in - if Sys.file_exists "cpdflin" then (dotslash ^ "cpdflin") else - if Sys.file_exists "cpdflin.exe" then (dotslash ^ "cpdflin.exe") else - match Sys.os_type with - "Win32" -> "cpdflin.exe" - | _ -> "cpdflin" - -(* Call cpdflin, given the (temp) input name, the output name, and the location -of the cpdflin binary. Returns the exit code. *) -let call_cpdflin cpdflin temp output best_password = - let command = - cpdflin ^ " --linearize " ^ " --password=" ^ best_password ^ " " ^ - Filename.quote temp ^ " " ^ Filename.quote output - in - match Sys.os_type with - "Win32" -> - (* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *) - if !debug then prerr_endline command; - Sys.command command - | _ -> - (* On other platforms, if -cpdflin was provided, or cpdflin was in the - current folder, set up LD_LIBRARY_PATH: *) - match cpdflin with - "cpdflin" -> - if !debug then prerr_endline command; - Sys.command command - | _ -> - let command = - "DYLD_FALLBACK_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^ - "LD_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^ - command - in - if !debug then prerr_endline command; - Sys.command command (* Recompress anything which isn't compressed, unless it's metadata. *) let recompress_stream pdf = function @@ -3856,140 +3816,6 @@ let append_page_content_page fast s before pdf n page = let append_page_content s before fast range pdf = process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range - -(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names - * 2. Calculate a change list to coalesce them - * 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties - * 4. Do the changes to all indirect references in the whole pdf *) -(*FIXME Pre-existing nulls - what to do? *) -let ocg_coalesce pdf = - match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with - None -> () - | Some ocpdict -> - let number_name_pairs = - match Pdf.lookup_direct pdf "/OCGs" ocpdict with - Some (Pdf.Array ocgs) -> - begin let numbers = - map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs - in - let names = - map - (fun i -> - try - begin match Pdf.lookup_obj pdf i with - Pdf.Dictionary d -> - begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with - Some (Pdf.String s) -> s - | _ -> failwith "ocg: missing name" - end - | _ -> - failwith "ocg: not a dictionary" - end - with _ -> failwith "OCG object missing") - numbers - in - combine numbers names - end - | _ -> failwith "Malformed or missing /OCGs" - in - (*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*) - let changes = - let cf (_, name) (_, name') = compare name name' in - let sets = collate cf (List.stable_sort cf number_name_pairs) in - flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets) - in - (*Printf.printf "\nChanges are:\n"; - List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*) - let new_ocproperties = - let remove_from_array key nums dict = - match Pdf.lookup_direct pdf key dict with - | Some (Pdf.Array elts) -> - let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in - Pdf.add_dict_entry dict key (Pdf.Array elts') - | _ -> dict - in - let remove_from_array_inside_d key nums dict = - match Pdf.lookup_direct pdf "/D" dict with - | Some (Pdf.Dictionary ddict) -> - begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with - | Some (Pdf.Array elts) -> - let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in - Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts')) - | _ -> dict - end - | _ -> failwith "No /D dict in OCGProperties" - in - let nums = map fst changes in - (*Printf.printf "\nto remove:\n"; - List.iter (Printf.printf "%i ") nums;*) - remove_from_array "/OCGs" nums - (remove_from_array_inside_d "/ON" nums - (remove_from_array_inside_d "/OFF" nums - (remove_from_array_inside_d "/Order" nums ocpdict))) - in - (*flprint (Pdfwrite.string_of_pdf new_ocproperties);*) - let ocp_objnum = Pdf.addobj pdf new_ocproperties in - let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in - pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); - pdf.Pdf.root <- new_catalog; - Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf - -let ocg_get_list pdf = - let l = ref [] in - begin match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with - None -> () - | Some ocpdict -> - match Pdf.lookup_direct pdf "/OCGs" ocpdict with - Some (Pdf.Array elts) -> - iter - (function - Pdf.Indirect i -> - (match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with - Some (Pdf.String s) -> l := s::!l | _ -> ()) - | _ -> ()) - elts - | _ -> () - end; - rev !l - -let ocg_list pdf = - List.iter (Printf.printf "%s\n") (ocg_get_list pdf) - -let ocg_rename f t pdf = - Pdf.objselfmap - (function - Pdf.Dictionary d -> - begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with - Some (Pdf.Name "/OCG") -> - begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with - Some (Pdf.String s) when s = f -> - Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t) - | _ -> Pdf.Dictionary d - end - | _ -> Pdf.Dictionary d - end - | x -> x - ) - pdf - -let ocg_order_all pdf = - match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with - None -> () - | Some ocpdict -> - match Pdf.lookup_direct pdf "/OCGs" ocpdict with - Some (Pdf.Array elts) -> - begin match Pdf.lookup_direct pdf "/D" ocpdict with - Some (Pdf.Dictionary d) -> - let newd = Pdf.add_dict_entry (Pdf.Dictionary d) "/Order" (Pdf.Array elts) in - let new_ocproperties = Pdf.add_dict_entry ocpdict "/D" newd in - let ocp_objnum = Pdf.addobj pdf new_ocproperties in - let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in - pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); - pdf.Pdf.root <- new_catalog - | _ -> () - end - | _ -> () - (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. * * We use different dash lengths and colours to help distinguish coincident diff --git a/cpdf.mli b/cpdf.mli index 7262dcf..3f4efc4 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -353,26 +353,12 @@ val remove_all_text : int list -> Pdf.t -> Pdf.t val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit -val find_cpdflin : string option -> string - -val call_cpdflin : string -> string -> string -> string -> int - val debug : bool ref val extract_text : float option -> Pdf.t -> int list -> string val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t -val ocg_coalesce : Pdf.t -> unit - -val ocg_get_list : Pdf.t -> string list - -val ocg_list : Pdf.t -> unit - -val ocg_rename : string -> string -> Pdf.t -> unit - -val ocg_order_all : Pdf.t -> unit - val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 1347405..07f0bf6 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -73,6 +73,7 @@ let parse_pagespec_allow_empty pdf spec = try Cpdfpagespec.parse_pagespec pdf spec with Pdf.PDFError ("Page range specifies no pages") -> [] + (* Operations. *) type op = | CopyFont of string @@ -708,6 +709,47 @@ let reset_arguments () = * squeeze options: a little odd, but we want it to happen on eventual * output. *) +(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume +installed at a system place *) +let find_cpdflin provided = + match provided with + Some x -> x + | None -> + let dotslash = match Sys.os_type with "Win32" -> "" | _ -> "./" in + if Sys.file_exists "cpdflin" then (dotslash ^ "cpdflin") else + if Sys.file_exists "cpdflin.exe" then (dotslash ^ "cpdflin.exe") else + match Sys.os_type with + "Win32" -> "cpdflin.exe" + | _ -> "cpdflin" + +(* Call cpdflin, given the (temp) input name, the output name, and the location +of the cpdflin binary. Returns the exit code. *) +let call_cpdflin cpdflin temp output best_password = + let command = + cpdflin ^ " --linearize " ^ " --password=" ^ best_password ^ " " ^ + Filename.quote temp ^ " " ^ Filename.quote output + in + match Sys.os_type with + "Win32" -> + (* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *) + if args.debug then prerr_endline command; + Sys.command command + | _ -> + (* On other platforms, if -cpdflin was provided, or cpdflin was in the + current folder, set up LD_LIBRARY_PATH: *) + match cpdflin with + "cpdflin" -> + if args.debug then prerr_endline command; + Sys.command command + | _ -> + let command = + "DYLD_FALLBACK_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^ + "LD_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^ + command + in + if args.debug then prerr_endline command; + Sys.command command + let get_pagespec () = match args.inputs with | (_, ps, _, _, _, _)::_ -> ps @@ -2745,7 +2787,7 @@ let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outna end; begin if will_linearize then - let cpdflin = Cpdf.find_cpdflin args.cpdflin in + let cpdflin = find_cpdflin args.cpdflin in match args.inputs with [] -> raise (Pdf.PDFError "no input in recryption") | (_, _, user_pw, owner_pw, _, _)::_ -> @@ -2753,7 +2795,7 @@ let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outna if owner_pw <> "" then owner_pw else user_pw in let code = - Cpdf.call_cpdflin cpdflin outname' outname best_password + call_cpdflin cpdflin outname' outname best_password in if code > 0 then begin @@ -3837,18 +3879,18 @@ let go () = write_json args.out pdf | Some OCGCoalesce -> let pdf = get_single_pdf args.op false in - Cpdf.ocg_coalesce pdf; + Cpdfocg.ocg_coalesce pdf; write_pdf false pdf | Some OCGList -> let pdf = get_single_pdf args.op true in - Cpdf.ocg_list pdf + Cpdfocg.ocg_list pdf | Some OCGRename -> let pdf = get_single_pdf args.op false in - Cpdf.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf; + Cpdfocg.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf; write_pdf false pdf | Some OCGOrderAll -> let pdf = get_single_pdf args.op false in - Cpdf.ocg_order_all pdf; + Cpdfocg.ocg_order_all pdf; write_pdf false pdf | Some (StampAsXObject stamp) -> let stamp_pdf = diff --git a/cpdfocg.ml b/cpdfocg.ml new file mode 100644 index 0000000..dcc6184 --- /dev/null +++ b/cpdfocg.ml @@ -0,0 +1,134 @@ +open Pdfutil + +(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names + * 2. Calculate a change list to coalesce them + * 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties + * 4. Do the changes to all indirect references in the whole pdf *) +(*FIXME Pre-existing nulls - what to do? *) +let ocg_coalesce pdf = + match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with + None -> () + | Some ocpdict -> + let number_name_pairs = + match Pdf.lookup_direct pdf "/OCGs" ocpdict with + Some (Pdf.Array ocgs) -> + begin let numbers = + map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs + in + let names = + map + (fun i -> + try + begin match Pdf.lookup_obj pdf i with + Pdf.Dictionary d -> + begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with + Some (Pdf.String s) -> s + | _ -> failwith "ocg: missing name" + end + | _ -> + failwith "ocg: not a dictionary" + end + with _ -> failwith "OCG object missing") + numbers + in + combine numbers names + end + | _ -> failwith "Malformed or missing /OCGs" + in + (*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*) + let changes = + let cf (_, name) (_, name') = compare name name' in + let sets = collate cf (List.stable_sort cf number_name_pairs) in + flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets) + in + (*Printf.printf "\nChanges are:\n"; + List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*) + let new_ocproperties = + let remove_from_array key nums dict = + match Pdf.lookup_direct pdf key dict with + | Some (Pdf.Array elts) -> + let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in + Pdf.add_dict_entry dict key (Pdf.Array elts') + | _ -> dict + in + let remove_from_array_inside_d key nums dict = + match Pdf.lookup_direct pdf "/D" dict with + | Some (Pdf.Dictionary ddict) -> + begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with + | Some (Pdf.Array elts) -> + let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in + Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts')) + | _ -> dict + end + | _ -> failwith "No /D dict in OCGProperties" + in + let nums = map fst changes in + (*Printf.printf "\nto remove:\n"; + List.iter (Printf.printf "%i ") nums;*) + remove_from_array "/OCGs" nums + (remove_from_array_inside_d "/ON" nums + (remove_from_array_inside_d "/OFF" nums + (remove_from_array_inside_d "/Order" nums ocpdict))) + in + (*flprint (Pdfwrite.string_of_pdf new_ocproperties);*) + let ocp_objnum = Pdf.addobj pdf new_ocproperties in + let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in + pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); + pdf.Pdf.root <- new_catalog; + Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf + +let ocg_get_list pdf = + let l = ref [] in + begin match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with + None -> () + | Some ocpdict -> + match Pdf.lookup_direct pdf "/OCGs" ocpdict with + Some (Pdf.Array elts) -> + iter + (function + Pdf.Indirect i -> + (match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with + Some (Pdf.String s) -> l := s::!l | _ -> ()) + | _ -> ()) + elts + | _ -> () + end; + rev !l + +let ocg_list pdf = + List.iter (Printf.printf "%s\n") (ocg_get_list pdf) + +let ocg_rename f t pdf = + Pdf.objselfmap + (function + Pdf.Dictionary d -> + begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with + Some (Pdf.Name "/OCG") -> + begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with + Some (Pdf.String s) when s = f -> + Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t) + | _ -> Pdf.Dictionary d + end + | _ -> Pdf.Dictionary d + end + | x -> x + ) + pdf + +let ocg_order_all pdf = + match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with + None -> () + | Some ocpdict -> + match Pdf.lookup_direct pdf "/OCGs" ocpdict with + Some (Pdf.Array elts) -> + begin match Pdf.lookup_direct pdf "/D" ocpdict with + Some (Pdf.Dictionary d) -> + let newd = Pdf.add_dict_entry (Pdf.Dictionary d) "/Order" (Pdf.Array elts) in + let new_ocproperties = Pdf.add_dict_entry ocpdict "/D" newd in + let ocp_objnum = Pdf.addobj pdf new_ocproperties in + let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in + pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog); + pdf.Pdf.root <- new_catalog + | _ -> () + end + | _ -> () diff --git a/cpdfocg.mli b/cpdfocg.mli new file mode 100644 index 0000000..8d13d01 --- /dev/null +++ b/cpdfocg.mli @@ -0,0 +1,9 @@ +val ocg_coalesce : Pdf.t -> unit + +val ocg_get_list : Pdf.t -> string list + +val ocg_list : Pdf.t -> unit + +val ocg_rename : string -> string -> Pdf.t -> unit + +val ocg_order_all : Pdf.t -> unit