stderr unbuffered

This commit is contained in:
John Whitington 2021-07-23 16:26:07 +01:00
parent be6a9a3ddf
commit 8fda026ad7
5 changed files with 73 additions and 48 deletions

17
Changes
View File

@ -1,4 +1,9 @@
Version 2.4 (June 2021) 2.5 (Upcoming)
o Environment variable CPDF_REPRODUCIBLE_DATES for testing
o Effectively make stderr unbuffered
2.4 (June 2021)
o New operation -extract-images o New operation -extract-images
o New operation -output-json et al. to export PDF files in JSON format o New operation -output-json et al. to export PDF files in JSON format
@ -11,13 +16,13 @@ o Appearance streams transformed when transforming annotations
o Bookmark destination positions transformed when transforming pages o Bookmark destination positions transformed when transforming pages
o No longer depends on Bigarray or Unix modules o No longer depends on Bigarray or Unix modules
Version 2.3 (patchlevel 1, December 2019) 2.3 (patchlevel 1, December 2019)
o Fixed bug which prevented -info working on encrypted files o Fixed bug which prevented -info working on encrypted files
o Allow -shift with -add-text for additional adjustment o Allow -shift with -add-text for additional adjustment
o Prepend and postpend directly to page content streams o Prepend and postpend directly to page content streams
Version 2.3 (October 2019) 2.3 (October 2019)
o Directly set and remove Trim, Art, and Bleed boxes o Directly set and remove Trim, Art, and Bleed boxes
o Dump attachments to file o Dump attachments to file
@ -32,14 +37,14 @@ o Create PDF files from scatch
o Remove single images by name o Remove single images by name
o Add trim marks o Add trim marks
Version 2.2 (patchlevel 1) 2.2 (patchlevel 1)
o Fix for inability to read null objects in streams o Fix for inability to read null objects in streams
o Workaround for Adobe "Error 21" on re-saving encrypted files o Workaround for Adobe "Error 21" on re-saving encrypted files
o More efficient bookmark operations on files with many pages o More efficient bookmark operations on files with many pages
o New operation -hard-box to clip contents to a given box o New operation -hard-box to clip contents to a given box
Version 2.2 (March 2017) 2.2 (March 2017)
o Perform arithmetic on dimensions when specifying size or position o Perform arithmetic on dimensions when specifying size or position
o Add simple rectangles to PDF to blank things out o Add simple rectangles to PDF to blank things out
@ -57,7 +62,7 @@ o -print-page-labels
o -squeeze replaces smpdf o -squeeze replaces smpdf
o Preserve more sharing of data when doing merges and page alterations o Preserve more sharing of data when doing merges and page alterations
Version 2.1 (December 2014) 2.1 (December 2014)
o Encryption now much, much faster o Encryption now much, much faster
o Faster parsing of delayed object streams on large files o Faster parsing of delayed object streams on large files

28
cpdf.ml
View File

@ -587,7 +587,11 @@ let rec process_text time text m =
| (s, r)::t -> process_text time (string_replace_all_lazy s r text) t | (s, r)::t -> process_text time (string_replace_all_lazy s r text) t
let expand_date = function let expand_date = function
| "now" -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" | "now" ->
begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with
| Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S"
| _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S"
end
| x -> x | x -> x
(* For uses of process_pages which don't need to deal with matrices, this (* For uses of process_pages which don't need to deal with matrices, this
@ -628,7 +632,7 @@ let protect fast pdf resources content =
let qs = length (keep (eq Pdfops.Op_q) ops) in let qs = length (keep (eq Pdfops.Op_q) ops) in
let bigqs = length (keep (eq Pdfops.Op_Q) ops) in let bigqs = length (keep (eq Pdfops.Op_Q) ops) in
let deficit = if qs > bigqs then qs - bigqs else 0 in let deficit = if qs > bigqs then qs - bigqs else 0 in
if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n" deficit; if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit;
deficit deficit
in in
let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in
@ -884,7 +888,7 @@ let list_attached_files pdf =
| Some (Pdf.String s) -> | Some (Pdf.String s) ->
begin match Pdf.lookup_direct pdf "/FS" annot with begin match Pdf.lookup_direct pdf "/FS" annot with
| Some ((Pdf.Dictionary _) as d) -> | Some ((Pdf.Dictionary _) as d) ->
Printf.eprintf "%s\n" (Pdfwrite.string_of_pdf d); (*Printf.eprintf "%s\n%!" (Pdfwrite.string_of_pdf d);*)
begin match Pdf.lookup_direct pdf "/EF" d with begin match Pdf.lookup_direct pdf "/EF" d with
| Some ((Pdf.Dictionary _) as d) -> | Some ((Pdf.Dictionary _) as d) ->
begin match Pdf.lookup_direct pdf "/F" d with begin match Pdf.lookup_direct pdf "/F" d with
@ -2260,7 +2264,7 @@ let change_pattern_matrices_resources pdf tr resources =
end end
with with
Pdftransform.NonInvertable -> Pdftransform.NonInvertable ->
Printf.eprintf "Warning: noninvertible matrix"; Printf.eprintf "Warning: noninvertible matrix\n%!";
resources resources
let change_pattern_matrices_page pdf tr page = let change_pattern_matrices_page pdf tr page =
@ -2371,14 +2375,14 @@ let transform_annotations pdf transform rest =
Hashtbl.add seen_nums i (); Hashtbl.add seen_nums i ();
transform_xobject_in_place pdf transform i transform_xobject_in_place pdf transform i
end end
| _ -> Printf.eprintf "Malformed /AP structure b"; ()) | _ -> Printf.eprintf "Malformed /AP structure b%\n!"; ())
dict dict
| _ -> Printf.eprintf "Malformed /AP structure c"; ()) | _ -> Printf.eprintf "Malformed /AP structure c\n%!"; ())
dict dict
| _ -> Printf.eprintf "Malformed /AP structure\n"; () | _ -> Printf.eprintf "Malformed /AP structure\n%!"; ()
end;*) end;*)
Pdf.addobj_given_num pdf (i, annot) Pdf.addobj_given_num pdf (i, annot)
| _ -> Printf.eprintf "transform_annotations: not indirect") | _ -> Printf.eprintf "transform_annotations: not indirect\n%!")
annots annots
| _ -> () | _ -> ()
@ -3776,11 +3780,11 @@ let xmp_date date =
| _ -> raise Exit | _ -> raise Exit
end end
| _ -> | _ ->
Printf.eprintf "xmp_date: Malformed date string (no year): %s\n" date; Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date;
make_xmp_date_from_components d make_xmp_date_from_components d
end end
| _ -> | _ ->
Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n" date; Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date;
make_xmp_date_from_components d make_xmp_date_from_components d
with with
Exit -> make_xmp_date_from_components d Exit -> make_xmp_date_from_components d
@ -4587,7 +4591,9 @@ let trim_marks_page fast pdf n page =
@ [Pdfops.Op_Q] @ [Pdfops.Op_Q]
in in
Pdfpage.postpend_operators pdf ops ~fast page Pdfpage.postpend_operators pdf ops ~fast page
| _, _ -> Printf.eprintf "warning: no /TrimBox found on page %i\n" n; page | _, _ ->
(*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*)
page
let trim_marks ?(fast=false) pdf range = let trim_marks ?(fast=false) pdf range =
process_pages (ppstub (trim_marks_page fast pdf)) pdf range process_pages (ppstub (trim_marks_page fast pdf)) pdf range

View File

@ -58,8 +58,7 @@ let error s =
if not !stay_on_error then exit 2 else raise StayOnError if not !stay_on_error then exit 2 else raise StayOnError
let soft_error s = let soft_error s =
Printf.eprintf "%s\n" s; Printf.eprintf "%s\n%!" s;
flush stderr;
if not !stay_on_error then exit 1 else raise StayOnError if not !stay_on_error then exit 1 else raise StayOnError
let parse_pagespec pdf spec = let parse_pagespec pdf spec =
@ -796,7 +795,7 @@ let detect_duplicate_op op =
match args.op with match args.op with
None | Some Shift -> () None | Some Shift -> ()
| _ -> | _ ->
Printf.eprintf "Operation %s already specified, so cannot specify operation %s.\nUse AND from Chapter 1 of the manual to chain commands together.\n" Printf.eprintf "Operation %s already specified, so cannot specify operation %s.\nUse AND from Chapter 1 of the manual to chain commands together.\n%!"
(string_of_op (unopt args.op)) (string_of_op op); (string_of_op (unopt args.op)) (string_of_op op);
exit 1 exit 1
@ -1321,7 +1320,7 @@ let setrevision n =
(a, b, c, d, e, _)::more -> (a, b, c, d, e, _)::more ->
args.inputs <- (a, b, c, d, e, Some n) :: more args.inputs <- (a, b, c, d, e, Some n) :: more
| [] -> | [] ->
Printf.eprintf "Warning. -revision ignored. Put it after the filename.\n" Printf.eprintf "Warning. -revision ignored. Put it after the filename.\n%!"
let setoutline () = let setoutline () =
args.outline <- true args.outline <- true
@ -2261,7 +2260,7 @@ let filesize name =
(* Embed missing fonts with Ghostscript. *) (* Embed missing fonts with Ghostscript. *)
let embed_missing_fonts fi fo = let embed_missing_fonts fi fo =
if args.path_to_ghostscript = "" then begin if args.path_to_ghostscript = "" then begin
Printf.eprintf "Please supply path to gs with -gs\n"; Printf.eprintf "Please supply path to gs with -gs\n%!";
exit 2 exit 2
end; end;
let gscall = let gscall =
@ -2271,15 +2270,15 @@ let embed_missing_fonts fi fo =
in in
match Sys.command gscall with match Sys.command gscall with
| 0 -> exit 0 | 0 -> exit 0
| _ -> Printf.eprintf "Font embedding failed.\n"; exit 2 | _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2
(* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF (* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF
* cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *) * cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *)
let mend_pdf_file_with_ghostscript filename = let mend_pdf_file_with_ghostscript filename =
if args.path_to_ghostscript = "" then begin if args.path_to_ghostscript = "" then begin
Printf.eprintf "Please supply path to gs with -gs\n"; Printf.eprintf "Please supply path to gs with -gs\n%!";
end; end;
Printf.eprintf "CPDF could not mend. Attempting to mend file with gs\n"; Printf.eprintf "CPDF could not mend. Attempting to mend file with gs\n%!";
flush stderr; flush stderr;
let tmpout = Filename.temp_file "cpdf" ".pdf" in let tmpout = Filename.temp_file "cpdf" ".pdf" in
tempfiles := tmpout::!tempfiles; tempfiles := tmpout::!tempfiles;
@ -2289,8 +2288,8 @@ let mend_pdf_file_with_ghostscript filename =
" -dBATCH " ^ Filename.quote filename " -dBATCH " ^ Filename.quote filename
in in
match Sys.command gscall with match Sys.command gscall with
| 0 -> Printf.eprintf "Succeeded!\n"; flush stderr; tmpout | 0 -> Printf.eprintf "Succeeded!\n%!"; flush stderr; tmpout
| _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n"; flush stderr; exit 2 | _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n%!"; flush stderr; exit 2
exception StdInBytes of bytes exception StdInBytes of bytes
@ -2316,7 +2315,7 @@ let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy =
let failout () = let failout () =
if fail then begin if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *) (* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n"; Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n%!";
exit 2 exit 2
end end
in in
@ -2324,12 +2323,12 @@ let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy =
begin match args.inputs with begin match args.inputs with
(InFile inname, _, _, _, _, _)::_ -> (InFile inname, _, _, _, _, _)::_ ->
begin try ignore (close_in (open_in inname)) with _ -> begin try ignore (close_in (open_in inname)) with _ ->
Printf.eprintf "File %s does not exist\n" inname; Printf.eprintf "File %s does not exist\n%!" inname;
exit 2 exit 2
end end
| _ -> () | _ -> ()
end; end;
Printf.eprintf "get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed\n"; Printf.eprintf "get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed\n%!";
exit 2 exit 2
in in
match args.inputs with match args.inputs with
@ -2399,7 +2398,7 @@ let rec get_pdf_from_input_kind ?(read_lazy=false) ?(decrypt=true) ?(fail=false)
let failout () = let failout () =
if fail then begin if fail then begin
(* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *) (* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *)
Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n"; Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n%!";
exit 2 exit 2
end end
in in
@ -2407,12 +2406,12 @@ let rec get_pdf_from_input_kind ?(read_lazy=false) ?(decrypt=true) ?(fail=false)
begin match input with begin match input with
(InFile inname, _, _, _, _, _) -> (InFile inname, _, _, _, _, _) ->
begin try ignore (close_in (open_in inname)) with _ -> begin try ignore (close_in (open_in inname)) with _ ->
Printf.eprintf "File %s does not exist\n" inname; Printf.eprintf "File %s does not exist\n%!" inname;
exit 2 exit 2
end end
| _ -> () | _ -> ()
end; end;
Printf.eprintf "get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed\n"; Printf.eprintf "get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed\n%!";
exit 2 exit 2
in in
match ik with match ik with
@ -2628,7 +2627,7 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf =
with with
End_of_file -> End_of_file ->
begin try close_in temp_file; Sys.remove temp with begin try close_in temp_file; Sys.remove temp with
e -> Printf.eprintf "Failed to remove temp file %s (%s)\n" temp (Printexc.to_string e) e -> Printf.eprintf "Failed to remove temp file %s (%s)\n%!" temp (Printexc.to_string e)
end; end;
flush stdout (*r For Windows *) flush stdout (*r For Windows *)
@ -2879,14 +2878,14 @@ let write_image pdf resources name image =
begin match args.path_to_p2p with begin match args.path_to_p2p with
| "" -> | "" ->
begin match args.path_to_im with begin match args.path_to_im with
"" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" "" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n%!"
| _ -> | _ ->
begin match begin match
Sys.command (Filename.quote_command args.path_to_im [pnm; png]) Sys.command (Filename.quote_command args.path_to_im [pnm; png])
with with
0 -> Sys.remove pnm 0 -> Sys.remove pnm
| _ -> | _ ->
Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n"; Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n%!";
Sys.remove pnm Sys.remove pnm
end end
end end
@ -2896,12 +2895,12 @@ let write_image pdf resources name image =
with with
| 0 -> Sys.remove pnm | 0 -> Sys.remove pnm
| _ -> | _ ->
Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n"; Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n%!";
Sys.remove pnm Sys.remove pnm
end end
end end
| _ -> | _ ->
Printf.eprintf "Unsupported image type when extracting image %s " name Printf.eprintf "Unsupported image type when extracting image %s %!" name
let written = ref [] let written = ref []
@ -3077,7 +3076,7 @@ let parse_whiteboxes filename =
result result
with with
e -> e ->
Printf.eprintf "%s\n" ("parse_whiteboxes " ^ Printexc.to_string e); Printf.eprintf "%s\n%!" ("parse_whiteboxes " ^ Printexc.to_string e);
raise (Failure "") raise (Failure "")
(* Make start, end pairs from a sortedrange *) (* Make start, end pairs from a sortedrange *)
@ -3138,7 +3137,7 @@ let calculate_margins filename pdf (s, e) =
(* Clean up temp files *) (* Clean up temp files *)
Sys.remove "margins.txt"; Sys.remove "margins.txt";
Sys.remove "waste.txt" Sys.remove "waste.txt"
| _ -> Printf.eprintf "Call to ghostscript failed." | _ -> Printf.eprintf "Call to ghostscript failed.\n%!"
let calculate_margins filename pdf range = let calculate_margins filename pdf range =
iter (calculate_margins filename pdf) (startends_of_range (sort compare range)) iter (calculate_margins filename pdf) (startends_of_range (sort compare range))
@ -3214,7 +3213,7 @@ let dump_attachment out pdf (_, embeddedfile) =
for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done;
close_out fh close_out fh
with with
e -> Printf.eprintf "Failed to write attachment to %s\n" filename; e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename;
end end
| _ -> () | _ -> ()
@ -3709,7 +3708,7 @@ let go () =
Pdf.iter_stream Pdf.iter_stream
(function stream -> (function stream ->
try Pdfcodec.decode_pdfstream_until_unknown pdf stream with try Pdfcodec.decode_pdfstream_until_unknown pdf stream with
e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) e -> Printf.eprintf "Decode failure: %s. Carrying on...\n%!" (Printexc.to_string e); ())
pdf; pdf;
write_pdf ~is_decompress:true false pdf write_pdf ~is_decompress:true false pdf
| Some Compress -> | Some Compress ->
@ -4251,7 +4250,7 @@ let go () =
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
calculate_margins s pdf range calculate_margins s pdf range
| _ -> | _ ->
Printf.eprintf "CSP3: Too many input files or input not a file" Printf.eprintf "CSP3: Too many input files or input not a file\n%!"
end end
| Some ExtractText -> | Some ExtractText ->
let pdf = get_single_pdf args.op true in let pdf = get_single_pdf args.op true in
@ -4371,7 +4370,7 @@ let check_command_line () =
let parse_argv () s specs anon_fun usage_msg = let parse_argv () s specs anon_fun usage_msg =
if args.debug then if args.debug then
Array.iter (Printf.eprintf "arg: %s\n") Sys.argv; Array.iter (Printf.eprintf "arg: %s\n%!") Sys.argv;
Arg.parse_argv ~current:(ref 0) s specs anon_fun usage_msg; Arg.parse_argv ~current:(ref 0) s specs anon_fun usage_msg;
check_command_line () check_command_line ()
@ -4393,7 +4392,7 @@ let expand_args argv =
let gs_malformed_force fi fo = let gs_malformed_force fi fo =
if args.path_to_ghostscript = "" then begin if args.path_to_ghostscript = "" then begin
Printf.eprintf "Please supply path to gs with -gs\n"; Printf.eprintf "Please supply path to gs with -gs\n%!";
exit 2 exit 2
end; end;
let gscall = let gscall =
@ -4403,7 +4402,7 @@ let gs_malformed_force fi fo =
in in
match Sys.command gscall with match Sys.command gscall with
| 0 -> exit 0 | 0 -> exit 0
| _ -> Printf.eprintf "Failed to mend file.\n"; exit 2 | _ -> Printf.eprintf "Failed to mend file.\n%!"; exit 2
(* FIXME: Now we call this repeatedly from interactive programs, careful to (* FIXME: Now we call this repeatedly from interactive programs, careful to
ensure that all memory is cleaned. See clearance of filenames hashtable, for ensure that all memory is cleaned. See clearance of filenames hashtable, for

View File

@ -12,6 +12,17 @@ type t =
_tm_yday : int; _tm_yday : int;
_tm_isdst : bool} _tm_isdst : bool}
let dummy =
{_tm_sec = 0;
_tm_min = 0;
_tm_hour = 0;
_tm_mday = 1;
_tm_mon = 0;
_tm_year = 2000;
_tm_wday = 0;
_tm_yday = 0;
_tm_isdst = false}
let strf_A t = let strf_A t =
match t._tm_wday with match t._tm_wday with
| 0 -> "Sunday" | 1 -> "Monday" | 2 -> "Tuesday" | 0 -> "Sunday" | 1 -> "Monday" | 2 -> "Tuesday"
@ -169,7 +180,7 @@ let return_date () =
let current_time () = let current_time () =
try return_date () with try return_date () with
e -> e ->
Printf.eprintf "Failed to retrieve time due to %s\n" (Printexc.to_string e); Printf.eprintf "Failed to retrieve time due to %s\n%!" (Printexc.to_string e);
{_tm_sec = 0; {_tm_sec = 0;
_tm_min = 0; _tm_min = 0;
_tm_hour = 0; _tm_hour = 0;

View File

@ -15,7 +15,11 @@ type t =
_tm_yday : int; _tm_yday : int;
_tm_isdst : bool} _tm_isdst : bool}
(** Get the time now *)
val current_time : unit -> t val current_time : unit -> t
(** If time omitted, the current time is used. *) (** A dummy time value *)
val dummy : t
(** Strftime. If time omitted, the current time is used. *)
val strftime : ?time:t -> string -> string val strftime : ?time:t -> string -> string