From 8fda026ad7cc2faaae7c86b510bf2ca68fb56290 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 23 Jul 2021 16:26:07 +0100 Subject: [PATCH] stderr unbuffered --- Changes | 17 ++++++++++----- cpdf.ml | 28 ++++++++++++++---------- cpdfcommand.ml | 57 ++++++++++++++++++++++++------------------------ cpdfstrftime.ml | 13 ++++++++++- cpdfstrftime.mli | 6 ++++- 5 files changed, 73 insertions(+), 48 deletions(-) diff --git a/Changes b/Changes index 7a47882..e9e89bc 100644 --- a/Changes +++ b/Changes @@ -1,4 +1,9 @@ -Version 2.4 (June 2021) +2.5 (Upcoming) + +o Environment variable CPDF_REPRODUCIBLE_DATES for testing +o Effectively make stderr unbuffered + +2.4 (June 2021) o New operation -extract-images o New operation -output-json et al. to export PDF files in JSON format @@ -11,13 +16,13 @@ o Appearance streams transformed when transforming annotations o Bookmark destination positions transformed when transforming pages o No longer depends on Bigarray or Unix modules -Version 2.3 (patchlevel 1, December 2019) +2.3 (patchlevel 1, December 2019) o Fixed bug which prevented -info working on encrypted files o Allow -shift with -add-text for additional adjustment o Prepend and postpend directly to page content streams -Version 2.3 (October 2019) +2.3 (October 2019) o Directly set and remove Trim, Art, and Bleed boxes o Dump attachments to file @@ -32,14 +37,14 @@ o Create PDF files from scatch o Remove single images by name o Add trim marks -Version 2.2 (patchlevel 1) +2.2 (patchlevel 1) o Fix for inability to read null objects in streams o Workaround for Adobe "Error 21" on re-saving encrypted files o More efficient bookmark operations on files with many pages o New operation -hard-box to clip contents to a given box -Version 2.2 (March 2017) +2.2 (March 2017) o Perform arithmetic on dimensions when specifying size or position o Add simple rectangles to PDF to blank things out @@ -57,7 +62,7 @@ o -print-page-labels o -squeeze replaces smpdf o Preserve more sharing of data when doing merges and page alterations -Version 2.1 (December 2014) +2.1 (December 2014) o Encryption now much, much faster o Faster parsing of delayed object streams on large files diff --git a/cpdf.ml b/cpdf.ml index 3f7da52..2d87686 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -587,7 +587,11 @@ let rec process_text time text m = | (s, r)::t -> process_text time (string_replace_all_lazy s r text) t let expand_date = function - | "now" -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" + | "now" -> + begin match Sys.getenv_opt "CPDF_REPRODUCIBLE_DATES" with + | Some "true" -> Cpdfstrftime.strftime ~time:Cpdfstrftime.dummy "D:%Y%m%d%H%M%S" + | _ -> Cpdfstrftime.strftime "D:%Y%m%d%H%M%S" + end | x -> x (* For uses of process_pages which don't need to deal with matrices, this @@ -628,7 +632,7 @@ let protect fast pdf resources content = let qs = length (keep (eq Pdfops.Op_q) ops) in let bigqs = length (keep (eq Pdfops.Op_Q) ops) in let deficit = if qs > bigqs then qs - bigqs else 0 in - if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n" deficit; + if deficit <> 0 then Printf.eprintf "Q Deficit was nonzero. Fixing. %i\n%!" deficit; deficit in let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in @@ -884,7 +888,7 @@ let list_attached_files pdf = | Some (Pdf.String s) -> begin match Pdf.lookup_direct pdf "/FS" annot with | Some ((Pdf.Dictionary _) as d) -> - Printf.eprintf "%s\n" (Pdfwrite.string_of_pdf d); + (*Printf.eprintf "%s\n%!" (Pdfwrite.string_of_pdf d);*) begin match Pdf.lookup_direct pdf "/EF" d with | Some ((Pdf.Dictionary _) as d) -> begin match Pdf.lookup_direct pdf "/F" d with @@ -2260,7 +2264,7 @@ let change_pattern_matrices_resources pdf tr resources = end with Pdftransform.NonInvertable -> - Printf.eprintf "Warning: noninvertible matrix"; + Printf.eprintf "Warning: noninvertible matrix\n%!"; resources let change_pattern_matrices_page pdf tr page = @@ -2371,14 +2375,14 @@ let transform_annotations pdf transform rest = Hashtbl.add seen_nums i (); transform_xobject_in_place pdf transform i end - | _ -> Printf.eprintf "Malformed /AP structure b"; ()) + | _ -> Printf.eprintf "Malformed /AP structure b%\n!"; ()) dict - | _ -> Printf.eprintf "Malformed /AP structure c"; ()) + | _ -> Printf.eprintf "Malformed /AP structure c\n%!"; ()) dict - | _ -> Printf.eprintf "Malformed /AP structure\n"; () + | _ -> Printf.eprintf "Malformed /AP structure\n%!"; () end;*) Pdf.addobj_given_num pdf (i, annot) - | _ -> Printf.eprintf "transform_annotations: not indirect") + | _ -> Printf.eprintf "transform_annotations: not indirect\n%!") annots | _ -> () @@ -3776,11 +3780,11 @@ let xmp_date date = | _ -> raise Exit end | _ -> - Printf.eprintf "xmp_date: Malformed date string (no year): %s\n" date; + Printf.eprintf "xmp_date: Malformed date string (no year): %s\n%!" date; make_xmp_date_from_components d end | _ -> - Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n" date; + Printf.eprintf "xmp_date: Malformed date string (no prefix): %s\n%!" date; make_xmp_date_from_components d with Exit -> make_xmp_date_from_components d @@ -4587,7 +4591,9 @@ let trim_marks_page fast pdf n page = @ [Pdfops.Op_Q] in Pdfpage.postpend_operators pdf ops ~fast page - | _, _ -> Printf.eprintf "warning: no /TrimBox found on page %i\n" n; page + | _, _ -> + (*Printf.eprintf "warning: no /TrimBox found on page %i\n%!" n;*) + page let trim_marks ?(fast=false) pdf range = process_pages (ppstub (trim_marks_page fast pdf)) pdf range diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 24560ca..b12785b 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -58,8 +58,7 @@ let error s = if not !stay_on_error then exit 2 else raise StayOnError let soft_error s = - Printf.eprintf "%s\n" s; - flush stderr; + Printf.eprintf "%s\n%!" s; if not !stay_on_error then exit 1 else raise StayOnError let parse_pagespec pdf spec = @@ -796,7 +795,7 @@ let detect_duplicate_op op = match args.op with None | Some Shift -> () | _ -> - Printf.eprintf "Operation %s already specified, so cannot specify operation %s.\nUse AND from Chapter 1 of the manual to chain commands together.\n" + Printf.eprintf "Operation %s already specified, so cannot specify operation %s.\nUse AND from Chapter 1 of the manual to chain commands together.\n%!" (string_of_op (unopt args.op)) (string_of_op op); exit 1 @@ -1321,7 +1320,7 @@ let setrevision n = (a, b, c, d, e, _)::more -> args.inputs <- (a, b, c, d, e, Some n) :: more | [] -> - Printf.eprintf "Warning. -revision ignored. Put it after the filename.\n" + Printf.eprintf "Warning. -revision ignored. Put it after the filename.\n%!" let setoutline () = args.outline <- true @@ -2261,7 +2260,7 @@ let filesize name = (* Embed missing fonts with Ghostscript. *) let embed_missing_fonts fi fo = if args.path_to_ghostscript = "" then begin - Printf.eprintf "Please supply path to gs with -gs\n"; + Printf.eprintf "Please supply path to gs with -gs\n%!"; exit 2 end; let gscall = @@ -2271,15 +2270,15 @@ let embed_missing_fonts fi fo = in match Sys.command gscall with | 0 -> exit 0 - | _ -> Printf.eprintf "Font embedding failed.\n"; exit 2 + | _ -> Printf.eprintf "Font embedding failed.\n%!"; exit 2 (* Mend PDF file with Ghostscript. We use this if a file is malformed and CPDF * cannot mend it. It is copied to a temporary file, fixed, then we return None or Some (pdf). *) let mend_pdf_file_with_ghostscript filename = if args.path_to_ghostscript = "" then begin - Printf.eprintf "Please supply path to gs with -gs\n"; + Printf.eprintf "Please supply path to gs with -gs\n%!"; end; - Printf.eprintf "CPDF could not mend. Attempting to mend file with gs\n"; + Printf.eprintf "CPDF could not mend. Attempting to mend file with gs\n%!"; flush stderr; let tmpout = Filename.temp_file "cpdf" ".pdf" in tempfiles := tmpout::!tempfiles; @@ -2289,8 +2288,8 @@ let mend_pdf_file_with_ghostscript filename = " -dBATCH " ^ Filename.quote filename in match Sys.command gscall with - | 0 -> Printf.eprintf "Succeeded!\n"; flush stderr; tmpout - | _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n"; flush stderr; exit 2 + | 0 -> Printf.eprintf "Succeeded!\n%!"; flush stderr; tmpout + | _ -> Printf.eprintf "Could not fix malformed PDF file, even with gs\n%!"; flush stderr; exit 2 exception StdInBytes of bytes @@ -2316,7 +2315,7 @@ let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy = let failout () = if fail then begin (* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *) - Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n"; + Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n%!"; exit 2 end in @@ -2324,12 +2323,12 @@ let rec get_single_pdf ?(decrypt=true) ?(fail=false) op read_lazy = begin match args.inputs with (InFile inname, _, _, _, _, _)::_ -> begin try ignore (close_in (open_in inname)) with _ -> - Printf.eprintf "File %s does not exist\n" inname; + Printf.eprintf "File %s does not exist\n%!" inname; exit 2 end | _ -> () end; - Printf.eprintf "get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed\n"; + Printf.eprintf "get_single_pdf: failed to read malformed PDF file. Consider using -gs-malformed\n%!"; exit 2 in match args.inputs with @@ -2399,7 +2398,7 @@ let rec get_pdf_from_input_kind ?(read_lazy=false) ?(decrypt=true) ?(fail=false) let failout () = if fail then begin (* Reconstructed with ghostscript, but then we couldn't read it even then. Do not loop. *) - Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n"; + Printf.eprintf "Failed to read gs-reconstructed PDF even though gs succeeded\n%!"; exit 2 end in @@ -2407,12 +2406,12 @@ let rec get_pdf_from_input_kind ?(read_lazy=false) ?(decrypt=true) ?(fail=false) begin match input with (InFile inname, _, _, _, _, _) -> begin try ignore (close_in (open_in inname)) with _ -> - Printf.eprintf "File %s does not exist\n" inname; + Printf.eprintf "File %s does not exist\n%!" inname; exit 2 end | _ -> () end; - Printf.eprintf "get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed\n"; + Printf.eprintf "get_pdf_from_input_kind: failed to read malformed PDF file. Consider using -gs-malformed\n%!"; exit 2 in match ik with @@ -2628,7 +2627,7 @@ let write_pdf ?(encryption = None) ?(is_decompress=false) mk_id pdf = with End_of_file -> begin try close_in temp_file; Sys.remove temp with - e -> Printf.eprintf "Failed to remove temp file %s (%s)\n" temp (Printexc.to_string e) + e -> Printf.eprintf "Failed to remove temp file %s (%s)\n%!" temp (Printexc.to_string e) end; flush stdout (*r For Windows *) @@ -2879,14 +2878,14 @@ let write_image pdf resources name image = begin match args.path_to_p2p with | "" -> begin match args.path_to_im with - "" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" + "" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n%!" | _ -> begin match Sys.command (Filename.quote_command args.path_to_im [pnm; png]) with 0 -> Sys.remove pnm | _ -> - Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n"; + Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n%!"; Sys.remove pnm end end @@ -2896,12 +2895,12 @@ let write_image pdf resources name image = with | 0 -> Sys.remove pnm | _ -> - Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n"; + Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n%!"; Sys.remove pnm end end | _ -> - Printf.eprintf "Unsupported image type when extracting image %s " name + Printf.eprintf "Unsupported image type when extracting image %s %!" name let written = ref [] @@ -3077,7 +3076,7 @@ let parse_whiteboxes filename = result with e -> - Printf.eprintf "%s\n" ("parse_whiteboxes " ^ Printexc.to_string e); + Printf.eprintf "%s\n%!" ("parse_whiteboxes " ^ Printexc.to_string e); raise (Failure "") (* Make start, end pairs from a sortedrange *) @@ -3138,7 +3137,7 @@ let calculate_margins filename pdf (s, e) = (* Clean up temp files *) Sys.remove "margins.txt"; Sys.remove "waste.txt" - | _ -> Printf.eprintf "Call to ghostscript failed." + | _ -> Printf.eprintf "Call to ghostscript failed.\n%!" let calculate_margins filename pdf range = iter (calculate_margins filename pdf) (startends_of_range (sort compare range)) @@ -3214,7 +3213,7 @@ let dump_attachment out pdf (_, embeddedfile) = for x = 0 to bytes_size efdata - 1 do output_byte fh (bget efdata x) done; close_out fh with - e -> Printf.eprintf "Failed to write attachment to %s\n" filename; + e -> Printf.eprintf "Failed to write attachment to %s\n%!" filename; end | _ -> () @@ -3709,7 +3708,7 @@ let go () = Pdf.iter_stream (function stream -> try Pdfcodec.decode_pdfstream_until_unknown pdf stream with - e -> Printf.eprintf "Decode failure: %s. Carrying on...\n" (Printexc.to_string e); ()) + e -> Printf.eprintf "Decode failure: %s. Carrying on...\n%!" (Printexc.to_string e); ()) pdf; write_pdf ~is_decompress:true false pdf | Some Compress -> @@ -4251,7 +4250,7 @@ let go () = let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in calculate_margins s pdf range | _ -> - Printf.eprintf "CSP3: Too many input files or input not a file" + Printf.eprintf "CSP3: Too many input files or input not a file\n%!" end | Some ExtractText -> let pdf = get_single_pdf args.op true in @@ -4371,7 +4370,7 @@ let check_command_line () = let parse_argv () s specs anon_fun usage_msg = if args.debug then - Array.iter (Printf.eprintf "arg: %s\n") Sys.argv; + Array.iter (Printf.eprintf "arg: %s\n%!") Sys.argv; Arg.parse_argv ~current:(ref 0) s specs anon_fun usage_msg; check_command_line () @@ -4393,7 +4392,7 @@ let expand_args argv = let gs_malformed_force fi fo = if args.path_to_ghostscript = "" then begin - Printf.eprintf "Please supply path to gs with -gs\n"; + Printf.eprintf "Please supply path to gs with -gs\n%!"; exit 2 end; let gscall = @@ -4403,7 +4402,7 @@ let gs_malformed_force fi fo = in match Sys.command gscall with | 0 -> exit 0 - | _ -> Printf.eprintf "Failed to mend file.\n"; exit 2 + | _ -> Printf.eprintf "Failed to mend file.\n%!"; exit 2 (* FIXME: Now we call this repeatedly from interactive programs, careful to ensure that all memory is cleaned. See clearance of filenames hashtable, for diff --git a/cpdfstrftime.ml b/cpdfstrftime.ml index f0826fd..a1733a6 100644 --- a/cpdfstrftime.ml +++ b/cpdfstrftime.ml @@ -12,6 +12,17 @@ type t = _tm_yday : int; _tm_isdst : bool} +let dummy = + {_tm_sec = 0; + _tm_min = 0; + _tm_hour = 0; + _tm_mday = 1; + _tm_mon = 0; + _tm_year = 2000; + _tm_wday = 0; + _tm_yday = 0; + _tm_isdst = false} + let strf_A t = match t._tm_wday with | 0 -> "Sunday" | 1 -> "Monday" | 2 -> "Tuesday" @@ -169,7 +180,7 @@ let return_date () = let current_time () = try return_date () with e -> - Printf.eprintf "Failed to retrieve time due to %s\n" (Printexc.to_string e); + Printf.eprintf "Failed to retrieve time due to %s\n%!" (Printexc.to_string e); {_tm_sec = 0; _tm_min = 0; _tm_hour = 0; diff --git a/cpdfstrftime.mli b/cpdfstrftime.mli index ddf2076..d1cd20f 100644 --- a/cpdfstrftime.mli +++ b/cpdfstrftime.mli @@ -15,7 +15,11 @@ type t = _tm_yday : int; _tm_isdst : bool} +(** Get the time now *) val current_time : unit -> t -(** If time omitted, the current time is used. *) +(** A dummy time value *) +val dummy : t + +(** Strftime. If time omitted, the current time is used. *) val strftime : ?time:t -> string -> string