This commit is contained in:
John Whitington 2021-12-18 16:14:31 +00:00
parent e36552bffd
commit 1048bdf1df
6 changed files with 192 additions and 195 deletions

View File

@ -2,7 +2,7 @@
MODS = cpdfyojson cpdfxmlm \ MODS = cpdfyojson cpdfxmlm \
cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \ cpdfunicodedata cpdferror cpdfjson cpdfstrftime cpdfcoord cpdfattach \
cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \ cpdfpagespec cpdfposition cpdf cpdfpresent cpdffont cpdftype \
cpdftexttopdf cpdftoc cpdfpad cpdfcommand cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfcommand
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml

174
cpdf.ml
View File

@ -59,46 +59,6 @@ let report_pdf_size pdf =
flush stdout; flush stdout;
close_in fh close_in fh
(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume
installed at a system place *)
let find_cpdflin provided =
match provided with
Some x -> x
| None ->
let dotslash = match Sys.os_type with "Win32" -> "" | _ -> "./" in
if Sys.file_exists "cpdflin" then (dotslash ^ "cpdflin") else
if Sys.file_exists "cpdflin.exe" then (dotslash ^ "cpdflin.exe") else
match Sys.os_type with
"Win32" -> "cpdflin.exe"
| _ -> "cpdflin"
(* Call cpdflin, given the (temp) input name, the output name, and the location
of the cpdflin binary. Returns the exit code. *)
let call_cpdflin cpdflin temp output best_password =
let command =
cpdflin ^ " --linearize " ^ " --password=" ^ best_password ^ " " ^
Filename.quote temp ^ " " ^ Filename.quote output
in
match Sys.os_type with
"Win32" ->
(* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *)
if !debug then prerr_endline command;
Sys.command command
| _ ->
(* On other platforms, if -cpdflin was provided, or cpdflin was in the
current folder, set up LD_LIBRARY_PATH: *)
match cpdflin with
"cpdflin" ->
if !debug then prerr_endline command;
Sys.command command
| _ ->
let command =
"DYLD_FALLBACK_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^
"LD_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^
command
in
if !debug then prerr_endline command;
Sys.command command
(* Recompress anything which isn't compressed, unless it's metadata. *) (* Recompress anything which isn't compressed, unless it's metadata. *)
let recompress_stream pdf = function let recompress_stream pdf = function
@ -3856,140 +3816,6 @@ let append_page_content_page fast s before pdf n page =
let append_page_content s before fast range pdf = let append_page_content s before fast range pdf =
process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range process_pages (ppstub (append_page_content_page fast s before pdf)) pdf range
(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names
* 2. Calculate a change list to coalesce them
* 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties
* 4. Do the changes to all indirect references in the whole pdf *)
(*FIXME Pre-existing nulls - what to do? *)
let ocg_coalesce pdf =
match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
let number_name_pairs =
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array ocgs) ->
begin let numbers =
map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs
in
let names =
map
(fun i ->
try
begin match Pdf.lookup_obj pdf i with
Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with
Some (Pdf.String s) -> s
| _ -> failwith "ocg: missing name"
end
| _ ->
failwith "ocg: not a dictionary"
end
with _ -> failwith "OCG object missing")
numbers
in
combine numbers names
end
| _ -> failwith "Malformed or missing /OCGs"
in
(*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*)
let changes =
let cf (_, name) (_, name') = compare name name' in
let sets = collate cf (List.stable_sort cf number_name_pairs) in
flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets)
in
(*Printf.printf "\nChanges are:\n";
List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*)
let new_ocproperties =
let remove_from_array key nums dict =
match Pdf.lookup_direct pdf key dict with
| Some (Pdf.Array elts) ->
let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in
Pdf.add_dict_entry dict key (Pdf.Array elts')
| _ -> dict
in
let remove_from_array_inside_d key nums dict =
match Pdf.lookup_direct pdf "/D" dict with
| Some (Pdf.Dictionary ddict) ->
begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with
| Some (Pdf.Array elts) ->
let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in
Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts'))
| _ -> dict
end
| _ -> failwith "No /D dict in OCGProperties"
in
let nums = map fst changes in
(*Printf.printf "\nto remove:\n";
List.iter (Printf.printf "%i ") nums;*)
remove_from_array "/OCGs" nums
(remove_from_array_inside_d "/ON" nums
(remove_from_array_inside_d "/OFF" nums
(remove_from_array_inside_d "/Order" nums ocpdict)))
in
(*flprint (Pdfwrite.string_of_pdf new_ocproperties);*)
let ocp_objnum = Pdf.addobj pdf new_ocproperties in
let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in
pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog);
pdf.Pdf.root <- new_catalog;
Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf
let ocg_get_list pdf =
let l = ref [] in
begin match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array elts) ->
iter
(function
Pdf.Indirect i ->
(match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with
Some (Pdf.String s) -> l := s::!l | _ -> ())
| _ -> ())
elts
| _ -> ()
end;
rev !l
let ocg_list pdf =
List.iter (Printf.printf "%s\n") (ocg_get_list pdf)
let ocg_rename f t pdf =
Pdf.objselfmap
(function
Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with
Some (Pdf.Name "/OCG") ->
begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with
Some (Pdf.String s) when s = f ->
Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t)
| _ -> Pdf.Dictionary d
end
| _ -> Pdf.Dictionary d
end
| x -> x
)
pdf
let ocg_order_all pdf =
match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array elts) ->
begin match Pdf.lookup_direct pdf "/D" ocpdict with
Some (Pdf.Dictionary d) ->
let newd = Pdf.add_dict_entry (Pdf.Dictionary d) "/Order" (Pdf.Array elts) in
let new_ocproperties = Pdf.add_dict_entry ocpdict "/D" newd in
let ocp_objnum = Pdf.addobj pdf new_ocproperties in
let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in
pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog);
pdf.Pdf.root <- new_catalog
| _ -> ()
end
| _ -> ()
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes. (* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
* *
* We use different dash lengths and colours to help distinguish coincident * We use different dash lengths and colours to help distinguish coincident

View File

@ -353,26 +353,12 @@ val remove_all_text : int list -> Pdf.t -> Pdf.t
val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit val process_xobjects : Pdf.t -> Pdfpage.t -> (Pdf.t -> Pdf.pdfobject -> Pdf.pdfobject list -> Pdf.pdfobject list) -> unit
val find_cpdflin : string option -> string
val call_cpdflin : string -> string -> string -> string -> int
val debug : bool ref val debug : bool ref
val extract_text : float option -> Pdf.t -> int list -> string val extract_text : float option -> Pdf.t -> int list -> string
val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t val append_page_content : string -> bool -> bool -> int list -> Pdf.t -> Pdf.t
val ocg_coalesce : Pdf.t -> unit
val ocg_get_list : Pdf.t -> string list
val ocg_list : Pdf.t -> unit
val ocg_rename : string -> string -> Pdf.t -> unit
val ocg_order_all : Pdf.t -> unit
val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string val stamp_as_xobject : Pdf.t -> int list -> Pdf.t -> Pdf.t * string
val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit val remove_dict_entry : Pdf.t -> string -> Pdf.pdfobject option -> unit

View File

@ -73,6 +73,7 @@ let parse_pagespec_allow_empty pdf spec =
try Cpdfpagespec.parse_pagespec pdf spec with try Cpdfpagespec.parse_pagespec pdf spec with
Pdf.PDFError ("Page range specifies no pages") -> [] Pdf.PDFError ("Page range specifies no pages") -> []
(* Operations. *) (* Operations. *)
type op = type op =
| CopyFont of string | CopyFont of string
@ -708,6 +709,47 @@ let reset_arguments () =
* squeeze options: a little odd, but we want it to happen on eventual * squeeze options: a little odd, but we want it to happen on eventual
* output. *) * output. *)
(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume
installed at a system place *)
let find_cpdflin provided =
match provided with
Some x -> x
| None ->
let dotslash = match Sys.os_type with "Win32" -> "" | _ -> "./" in
if Sys.file_exists "cpdflin" then (dotslash ^ "cpdflin") else
if Sys.file_exists "cpdflin.exe" then (dotslash ^ "cpdflin.exe") else
match Sys.os_type with
"Win32" -> "cpdflin.exe"
| _ -> "cpdflin"
(* Call cpdflin, given the (temp) input name, the output name, and the location
of the cpdflin binary. Returns the exit code. *)
let call_cpdflin cpdflin temp output best_password =
let command =
cpdflin ^ " --linearize " ^ " --password=" ^ best_password ^ " " ^
Filename.quote temp ^ " " ^ Filename.quote output
in
match Sys.os_type with
"Win32" ->
(* On windows, don't use LD_LIBRARY_PATH - it will happen automatically *)
if args.debug then prerr_endline command;
Sys.command command
| _ ->
(* On other platforms, if -cpdflin was provided, or cpdflin was in the
current folder, set up LD_LIBRARY_PATH: *)
match cpdflin with
"cpdflin" ->
if args.debug then prerr_endline command;
Sys.command command
| _ ->
let command =
"DYLD_FALLBACK_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^
"LD_LIBRARY_PATH=" ^ Filename.dirname cpdflin ^ " " ^
command
in
if args.debug then prerr_endline command;
Sys.command command
let get_pagespec () = let get_pagespec () =
match args.inputs with match args.inputs with
| (_, ps, _, _, _, _)::_ -> ps | (_, ps, _, _, _, _)::_ -> ps
@ -2745,7 +2787,7 @@ let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outna
end; end;
begin begin
if will_linearize then if will_linearize then
let cpdflin = Cpdf.find_cpdflin args.cpdflin in let cpdflin = find_cpdflin args.cpdflin in
match args.inputs with match args.inputs with
[] -> raise (Pdf.PDFError "no input in recryption") [] -> raise (Pdf.PDFError "no input in recryption")
| (_, _, user_pw, owner_pw, _, _)::_ -> | (_, _, user_pw, owner_pw, _, _)::_ ->
@ -2753,7 +2795,7 @@ let really_write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf outna
if owner_pw <> "" then owner_pw else user_pw if owner_pw <> "" then owner_pw else user_pw
in in
let code = let code =
Cpdf.call_cpdflin cpdflin outname' outname best_password call_cpdflin cpdflin outname' outname best_password
in in
if code > 0 then if code > 0 then
begin begin
@ -3837,18 +3879,18 @@ let go () =
write_json args.out pdf write_json args.out pdf
| Some OCGCoalesce -> | Some OCGCoalesce ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
Cpdf.ocg_coalesce pdf; Cpdfocg.ocg_coalesce pdf;
write_pdf false pdf write_pdf false pdf
| Some OCGList -> | Some OCGList ->
let pdf = get_single_pdf args.op true in let pdf = get_single_pdf args.op true in
Cpdf.ocg_list pdf Cpdfocg.ocg_list pdf
| Some OCGRename -> | Some OCGRename ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
Cpdf.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf; Cpdfocg.ocg_rename args.ocgrenamefrom args.ocgrenameto pdf;
write_pdf false pdf write_pdf false pdf
| Some OCGOrderAll -> | Some OCGOrderAll ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
Cpdf.ocg_order_all pdf; Cpdfocg.ocg_order_all pdf;
write_pdf false pdf write_pdf false pdf
| Some (StampAsXObject stamp) -> | Some (StampAsXObject stamp) ->
let stamp_pdf = let stamp_pdf =

134
cpdfocg.ml Normal file
View File

@ -0,0 +1,134 @@
open Pdfutil
(* 1. Get list of indirects of all OCGs from the /OCProperties, and their textual names
* 2. Calculate a change list to coalesce them
* 3. Remove any changed ones from the /OCGs and /Order and /ON and /OFF in /OCProperties
* 4. Do the changes to all indirect references in the whole pdf *)
(*FIXME Pre-existing nulls - what to do? *)
let ocg_coalesce pdf =
match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
let number_name_pairs =
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array ocgs) ->
begin let numbers =
map (function Pdf.Indirect i -> i | _ -> failwith "Malformed /OCG entry") ocgs
in
let names =
map
(fun i ->
try
begin match Pdf.lookup_obj pdf i with
Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with
Some (Pdf.String s) -> s
| _ -> failwith "ocg: missing name"
end
| _ ->
failwith "ocg: not a dictionary"
end
with _ -> failwith "OCG object missing")
numbers
in
combine numbers names
end
| _ -> failwith "Malformed or missing /OCGs"
in
(*iter (fun (num, name) -> Printf.printf "%i = %s\n" num name) number_name_pairs;*)
let changes =
let cf (_, name) (_, name') = compare name name' in
let sets = collate cf (List.stable_sort cf number_name_pairs) in
flatten (option_map (function [] -> None | (hnum, _)::t -> Some (map (function (tnum, _) -> (tnum, hnum)) t)) sets)
in
(*Printf.printf "\nChanges are:\n";
List.iter (fun (f, t) -> Printf.printf "%i -> %i\n" f t) changes;*)
let new_ocproperties =
let remove_from_array key nums dict =
match Pdf.lookup_direct pdf key dict with
| Some (Pdf.Array elts) ->
let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in
Pdf.add_dict_entry dict key (Pdf.Array elts')
| _ -> dict
in
let remove_from_array_inside_d key nums dict =
match Pdf.lookup_direct pdf "/D" dict with
| Some (Pdf.Dictionary ddict) ->
begin match Pdf.lookup_direct pdf key (Pdf.Dictionary ddict) with
| Some (Pdf.Array elts) ->
let elts' = option_map (function Pdf.Indirect i -> if mem i nums then None else Some (Pdf.Indirect i) | _ -> None) elts in
Pdf.add_dict_entry dict "/D" (Pdf.add_dict_entry (Pdf.Dictionary ddict) key (Pdf.Array elts'))
| _ -> dict
end
| _ -> failwith "No /D dict in OCGProperties"
in
let nums = map fst changes in
(*Printf.printf "\nto remove:\n";
List.iter (Printf.printf "%i ") nums;*)
remove_from_array "/OCGs" nums
(remove_from_array_inside_d "/ON" nums
(remove_from_array_inside_d "/OFF" nums
(remove_from_array_inside_d "/Order" nums ocpdict)))
in
(*flprint (Pdfwrite.string_of_pdf new_ocproperties);*)
let ocp_objnum = Pdf.addobj pdf new_ocproperties in
let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in
pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog);
pdf.Pdf.root <- new_catalog;
Pdf.objselfmap (Pdf.renumber_object_parsed pdf (hashtable_of_dictionary changes)) pdf
let ocg_get_list pdf =
let l = ref [] in
begin match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array elts) ->
iter
(function
Pdf.Indirect i ->
(match Pdf.lookup_direct pdf "/Name" (Pdf.lookup_obj pdf i) with
Some (Pdf.String s) -> l := s::!l | _ -> ())
| _ -> ())
elts
| _ -> ()
end;
rev !l
let ocg_list pdf =
List.iter (Printf.printf "%s\n") (ocg_get_list pdf)
let ocg_rename f t pdf =
Pdf.objselfmap
(function
Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/Type" (Pdf.Dictionary d) with
Some (Pdf.Name "/OCG") ->
begin match Pdf.lookup_direct pdf "/Name" (Pdf.Dictionary d) with
Some (Pdf.String s) when s = f ->
Pdf.add_dict_entry (Pdf.Dictionary d) "/Name" (Pdf.String t)
| _ -> Pdf.Dictionary d
end
| _ -> Pdf.Dictionary d
end
| x -> x
)
pdf
let ocg_order_all pdf =
match Pdf.lookup_direct pdf "/OCProperties" (Pdf.catalog_of_pdf pdf) with
None -> ()
| Some ocpdict ->
match Pdf.lookup_direct pdf "/OCGs" ocpdict with
Some (Pdf.Array elts) ->
begin match Pdf.lookup_direct pdf "/D" ocpdict with
Some (Pdf.Dictionary d) ->
let newd = Pdf.add_dict_entry (Pdf.Dictionary d) "/Order" (Pdf.Array elts) in
let new_ocproperties = Pdf.add_dict_entry ocpdict "/D" newd in
let ocp_objnum = Pdf.addobj pdf new_ocproperties in
let new_catalog = Pdf.addobj pdf (Pdf.add_dict_entry (Pdf.catalog_of_pdf pdf) "/OCProperties" (Pdf.Indirect ocp_objnum)) in
pdf.Pdf.trailerdict <- Pdf.add_dict_entry pdf.Pdf.trailerdict "/Root" (Pdf.Indirect new_catalog);
pdf.Pdf.root <- new_catalog
| _ -> ()
end
| _ -> ()

9
cpdfocg.mli Normal file
View File

@ -0,0 +1,9 @@
val ocg_coalesce : Pdf.t -> unit
val ocg_get_list : Pdf.t -> string list
val ocg_list : Pdf.t -> unit
val ocg_rename : string -> string -> Pdf.t -> unit
val ocg_order_all : Pdf.t -> unit