Work on stdin for malformed files

This commit is contained in:
John Whitington 2019-07-02 14:10:42 +01:00
parent 017edbbd5e
commit 3b21ec6d29
2 changed files with 55 additions and 22 deletions

View File

@ -2246,7 +2246,11 @@ let mend_pdf_file_with_ghostscript filename =
| 0 -> Printf.eprintf "Succeeded!\n"; flush stderr; tmpout | 0 -> Printf.eprintf "Succeeded!\n"; flush stderr; tmpout
| _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n"; flush stderr; exit 2 | _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n"; flush stderr; exit 2
exception StdInBytes of bytes
let pdf_of_stdin ?revision user_pw owner_pw = let pdf_of_stdin ?revision user_pw owner_pw =
let rbytes = ref (mkbytes 0) in
try
let user_pw = Some user_pw let user_pw = Some user_pw
and owner_pw = if owner_pw = "" then None else Some owner_pw in and owner_pw = if owner_pw = "" then None else Some owner_pw in
let o, bytes = Pdfio.input_output_of_bytes 16384 in let o, bytes = Pdfio.input_output_of_bytes 16384 in
@ -2255,8 +2259,12 @@ let pdf_of_stdin ?revision user_pw owner_pw =
Pdf.empty () Pdf.empty ()
with with
End_of_file -> End_of_file ->
let i = Pdfio.input_of_bytes (Pdfio.extract_bytes_from_input_output o bytes) in let thebytes = Pdfio.extract_bytes_from_input_output o bytes in
rbytes := thebytes;
let i = Pdfio.input_of_bytes thebytes in
pdfread_pdf_of_input ?revision user_pw owner_pw i pdfread_pdf_of_input ?revision user_pw owner_pw i
with
_ -> raise (StdInBytes !rbytes)
let filenames = null_hash () let filenames = null_hash ()
@ -2282,6 +2290,17 @@ let get_pdf_from_input_kind ((_, _, u, o, _, revision) as input) op = function
decrypt_if_necessary input op (pdf_of_stdin ?revision u o) decrypt_if_necessary input op (pdf_of_stdin ?revision u o)
let rec get_single_pdf ?(fail=false) op read_lazy = let rec get_single_pdf ?(fail=false) op read_lazy =
let failout () =
if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n";
exit 2
end
in
let warn_gs () =
Printf.eprintf "Failed to read malformed PDF file. Consider using -gs-malformed\n";
exit 2
in
match args.inputs with match args.inputs with
| (InFile inname, x, u, o, y, revision) as input::more -> | (InFile inname, x, u, o, y, revision) as input::more ->
if args.squeeze then if args.squeeze then
@ -2296,25 +2315,37 @@ let rec get_single_pdf ?(fail=false) op read_lazy =
_ -> _ ->
if args.gs_malformed then if args.gs_malformed then
begin begin
if fail then begin failout ();
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n";
exit 2
end;
let newname = mend_pdf_file_with_ghostscript inname in let newname = mend_pdf_file_with_ghostscript inname in
args.inputs <- (InFile newname, x, u, o, y, revision)::more; args.inputs <- (InFile newname, x, u, o, y, revision)::more;
get_single_pdf ~fail:true op read_lazy get_single_pdf ~fail:true op read_lazy
end end
else else
begin warn_gs ()
Printf.eprintf "Failed to read malformed PDF file. Consider using -gs-malformed\n"; in
exit 2 args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
end decrypt_if_necessary input op pdf
| (StdIn, x, u, o, y, revision) as input::more ->
let pdf =
try pdf_of_stdin ?revision u o with
StdInBytes b ->
if args.gs_malformed then
begin
failout ();
let inname = Filename.temp_file "cpdf" ".pdf" in
tempfiles := inname::!tempfiles;
let fh = open_out_bin inname in
Pdfio.bytes_to_output_channel fh b;
close_out fh;
let newname = mend_pdf_file_with_ghostscript inname in
args.inputs <- (InFile newname, x, u, o, y, revision)::more;
get_single_pdf ~fail:true op read_lazy
end
else
warn_gs ()
in in
args.was_encrypted <- Pdfcrypt.is_encrypted pdf; args.was_encrypted <- Pdfcrypt.is_encrypted pdf;
decrypt_if_necessary input op pdf decrypt_if_necessary input op pdf
| (StdIn, _, u, o, _, revision) as input::_ ->
decrypt_if_necessary input op (pdf_of_stdin ?revision u o)
| (AlreadyInMemory pdf, _, _, _, _, _)::_ -> pdf | (AlreadyInMemory pdf, _, _, _, _, _)::_ -> pdf
| _ -> | _ ->
raise (Arg.Bad "cpdf: No input specified.\n") raise (Arg.Bad "cpdf: No input specified.\n")
@ -4269,7 +4300,8 @@ let go_withargv argv =
output_pdfs := []; output_pdfs := [];
go ()) go ())
sets; sets;
flush stdout (*r for Windows *) flush stdout; (*r for Windows *)
exit 0
with with
| Arg.Bad s -> | Arg.Bad s ->
prerr_string prerr_string

View File

@ -17,6 +17,7 @@
%FIXME: Document new -list-spot-colours %FIXME: Document new -list-spot-colours
%FIXME: Document new -pad-multiple-before %FIXME: Document new -pad-multiple-before
%FIXME: Document new @N@@@ @E@@@, @S@@@ options %FIXME: Document new @N@@@ @E@@@, @S@@@ options
%FIXME: Document the rotate dance for adding rotated text
\documentclass{book} \documentclass{book}
\usepackage{palatino} \usepackage{palatino}