more
This commit is contained in:
parent
3b21ec6d29
commit
a6328dc973
118
cpdfcommand.ml
118
cpdfcommand.ml
|
@ -2266,30 +2266,7 @@ let pdf_of_stdin ?revision user_pw owner_pw =
|
||||||
with
|
with
|
||||||
_ -> raise (StdInBytes !rbytes)
|
_ -> raise (StdInBytes !rbytes)
|
||||||
|
|
||||||
let filenames = null_hash ()
|
let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy =
|
||||||
|
|
||||||
(* This now memoizes on the name of the file to make sure we only load each
|
|
||||||
file once *)
|
|
||||||
let get_pdf_from_input_kind ((_, _, u, o, _, revision) as input) op = function
|
|
||||||
| AlreadyInMemory pdf -> pdf
|
|
||||||
| InFile s ->
|
|
||||||
if args.squeeze then
|
|
||||||
begin
|
|
||||||
let size = filesize s in
|
|
||||||
initial_file_size := size;
|
|
||||||
if !logto = None then Printf.printf "Initial file size is %i bytes\n" size
|
|
||||||
end;
|
|
||||||
begin try Hashtbl.find filenames s with
|
|
||||||
Not_found ->
|
|
||||||
let pdf = pdfread_pdf_of_file ?revision (optstring u) (optstring o) s in
|
|
||||||
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
|
||||||
let pdf = decrypt_if_necessary input op pdf in
|
|
||||||
Hashtbl.add filenames s pdf; pdf
|
|
||||||
end
|
|
||||||
| StdIn ->
|
|
||||||
decrypt_if_necessary input op (pdf_of_stdin ?revision u o)
|
|
||||||
|
|
||||||
let rec get_single_pdf ?(fail=false) op read_lazy =
|
|
||||||
let failout () =
|
let failout () =
|
||||||
if fail then begin
|
if fail then begin
|
||||||
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
|
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
|
||||||
|
@ -2324,7 +2301,7 @@ let rec get_single_pdf ?(fail=false) op read_lazy =
|
||||||
warn_gs ()
|
warn_gs ()
|
||||||
in
|
in
|
||||||
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
||||||
decrypt_if_necessary input op pdf
|
if decrypt then decrypt_if_necessary input op pdf else pdf
|
||||||
| (StdIn, x, u, o, y, revision) as input::more ->
|
| (StdIn, x, u, o, y, revision) as input::more ->
|
||||||
let pdf =
|
let pdf =
|
||||||
try pdf_of_stdin ?revision u o with
|
try pdf_of_stdin ?revision u o with
|
||||||
|
@ -2345,24 +2322,77 @@ let rec get_single_pdf ?(fail=false) op read_lazy =
|
||||||
warn_gs ()
|
warn_gs ()
|
||||||
in
|
in
|
||||||
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
||||||
decrypt_if_necessary input op pdf
|
if decrypt then decrypt_if_necessary input op pdf else pdf
|
||||||
| (AlreadyInMemory pdf, _, _, _, _, _)::_ -> pdf
|
| (AlreadyInMemory pdf, _, _, _, _, _)::_ -> pdf
|
||||||
| _ ->
|
| _ ->
|
||||||
raise (Arg.Bad "cpdf: No input specified.\n")
|
raise (Arg.Bad "cpdf: No input specified.\n")
|
||||||
|
|
||||||
let get_single_pdf_nodecrypt read_lazy =
|
let get_single_pdf_nodecrypt read_lazy =
|
||||||
match args.inputs with
|
get_single_pdf ~decrypt:false None read_lazy
|
||||||
| (InFile inname, _, u, o, _, revision)::_ ->
|
|
||||||
|
let filenames = null_hash ()
|
||||||
|
|
||||||
|
(* This now memoizes on the name of the file to make sure we only load each
|
||||||
|
file once *)
|
||||||
|
let rec get_pdf_from_input_kind ?(decrypt=true) ?(fail=false) ((_, x, u, o, y, revision) as input) op ik =
|
||||||
|
let failout () =
|
||||||
|
if fail then begin
|
||||||
|
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
|
||||||
|
Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n";
|
||||||
|
exit 2
|
||||||
|
end
|
||||||
|
in
|
||||||
|
let warn_gs () =
|
||||||
|
Printf.eprintf "Failed to read malformed PDF file. Consider using -gs-malformed\n";
|
||||||
|
exit 2
|
||||||
|
in
|
||||||
|
match ik with
|
||||||
|
| AlreadyInMemory pdf -> pdf
|
||||||
|
| InFile s ->
|
||||||
if args.squeeze then
|
if args.squeeze then
|
||||||
Printf.printf "Initial file size is %i bytes\n" (filesize inname);
|
begin
|
||||||
if read_lazy then
|
let size = filesize s in
|
||||||
pdfread_pdf_of_channel_lazy ?revision (optstring u) (optstring o) (open_in_bin inname)
|
initial_file_size := size;
|
||||||
|
if !logto = None then Printf.printf "Initial file size is %i bytes\n" size
|
||||||
|
end;
|
||||||
|
begin try Hashtbl.find filenames s with
|
||||||
|
Not_found ->
|
||||||
|
let pdf =
|
||||||
|
try pdfread_pdf_of_file ?revision (optstring u) (optstring o) s with
|
||||||
|
_ ->
|
||||||
|
if args.gs_malformed then
|
||||||
|
begin
|
||||||
|
failout ();
|
||||||
|
let newname = mend_pdf_file_with_ghostscript s in
|
||||||
|
get_pdf_from_input_kind ~fail:true (InFile newname, x, u, o, y, revision) op (InFile newname);
|
||||||
|
end
|
||||||
else
|
else
|
||||||
pdfread_pdf_of_file ?revision (optstring u) (optstring o) inname
|
warn_gs ()
|
||||||
| (StdIn, _, u, o, _, revision)::_ -> pdf_of_stdin ?revision u o
|
in
|
||||||
| (AlreadyInMemory pdf, _, _, _, _, _)::_ -> pdf
|
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
||||||
| _ ->
|
let pdf = if decrypt then decrypt_if_necessary input op pdf else pdf in
|
||||||
raise (Arg.Bad "cpdf: No input specified.\n")
|
Hashtbl.add filenames s pdf; pdf
|
||||||
|
end
|
||||||
|
| StdIn ->
|
||||||
|
let pdf =
|
||||||
|
try pdf_of_stdin ?revision u o with
|
||||||
|
StdInBytes b ->
|
||||||
|
if args.gs_malformed then
|
||||||
|
begin
|
||||||
|
failout ();
|
||||||
|
let inname = Filename.temp_file "cpdf" ".pdf" in
|
||||||
|
tempfiles := inname::!tempfiles;
|
||||||
|
let fh = open_out_bin inname in
|
||||||
|
Pdfio.bytes_to_output_channel fh b;
|
||||||
|
close_out fh;
|
||||||
|
let newname = mend_pdf_file_with_ghostscript inname in
|
||||||
|
get_pdf_from_input_kind ~fail:true (InFile newname, x, u, o, y, revision) op (InFile newname);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
warn_gs ()
|
||||||
|
in
|
||||||
|
args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
|
||||||
|
if decrypt then decrypt_if_necessary input op pdf else pdf
|
||||||
|
|
||||||
let rec unescape_octals prev = function
|
let rec unescape_octals prev = function
|
||||||
| [] -> rev prev
|
| [] -> rev prev
|
||||||
|
@ -3516,18 +3546,12 @@ let go () =
|
||||||
| _ -> error "extract fontfile: bad command line"
|
| _ -> error "extract fontfile: bad command line"
|
||||||
end
|
end
|
||||||
| Some CountPages ->
|
| Some CountPages ->
|
||||||
let pdf, inname, input =
|
begin match args.inputs with
|
||||||
match args.inputs with
|
[(ik, _, _, _, _, _) as input] ->
|
||||||
| (InFile inname, _, u, o, _, revision) as input::_ ->
|
let pdf = get_pdf_from_input_kind ~decrypt:false input (Some CountPages) ik in
|
||||||
pdfread_pdf_of_channel_lazy ?revision (optstring u) (optstring o) (open_in_bin inname), inname, input
|
|
||||||
| (StdIn, _, u, o, _, revision) as input::_ -> pdf_of_stdin ?revision u o, "", input
|
|
||||||
| (AlreadyInMemory pdf, _, _, _, _, _) as input::_ -> pdf, "", input
|
|
||||||
| _ -> raise (Arg.Bad "cpdf: No input specified.\n")
|
|
||||||
in
|
|
||||||
(*let pdf = decrypt_if_necessary input (Some CountPages) pdf in*)
|
|
||||||
(* 3/11/2016. We removed decryption here, because it doesn't seem necessary. Put
|
|
||||||
* back in on counterexample *)
|
|
||||||
output_page_count pdf
|
output_page_count pdf
|
||||||
|
| _ -> raise (Arg.Bad "CountPages: must have a single input file only")
|
||||||
|
end
|
||||||
| Some Revisions ->
|
| Some Revisions ->
|
||||||
let input =
|
let input =
|
||||||
match args.inputs with
|
match args.inputs with
|
||||||
|
|
Loading…
Reference in New Issue