From 20b9f59b2fd96333d8e1b6f5ade562a1ad385ad7 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Sun, 24 Dec 2023 13:54:21 +0000 Subject: [PATCH] -pixel-threshold --- cpdfcommand.ml | 15 ++++++++++++--- cpdfimage.ml | 32 ++++++++++++++++++-------------- cpdfimage.mli | 2 +- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index fbe0b1f..542759b 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -525,7 +525,8 @@ type args = mutable no_warn_rotate : bool; mutable jpegquality : int; mutable jpegqualitylossless : int; - mutable onebppmethod : string} + mutable onebppmethod : string; + mutable pixel_threshold : int} let args = {op = None; @@ -651,7 +652,8 @@ let args = no_warn_rotate = false; jpegquality = 100; jpegqualitylossless = 100; - onebppmethod = ""} + onebppmethod = ""; + pixel_threshold = 25} let reset_arguments () = args.op <- None; @@ -759,6 +761,7 @@ let reset_arguments () = args.jpegquality <- 100; args.jpegqualitylossless <- 100; args.onebppmethod <- ""; + args.pixel_threshold <- 0; (* Do not reset original_filename or cpdflin or was_encrypted or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or gs_malformed or gs_quiet or no-warn-rotate, since we want these to work @@ -1938,6 +1941,9 @@ let setjpegqualitylossless q = let set1bppmethod m = args.onebppmethod <- m +let setpixelthreshold i = + args.pixel_threshold <- i + (* Parse a control file, make an argv, and then make Arg parse it. *) let rec make_control_argv_and_parse filename = control_args := !control_args @ parse_control_file filename @@ -2727,6 +2733,9 @@ and specs = ("-1bpp-method", Arg.String set1bppmethod, " Set 1bpp compression method for existing images"); + ("-pixel-threshold", + Arg.Int setpixelthreshold, + " Only process images with more pixels than this"); ("-squeeze", Arg.Unit setsqueeze, " Squeeze"); @@ -4464,7 +4473,7 @@ let go () = write_pdf false (Cpdfchop.chop ~x ~y ~columns:args.impose_columns ~btt:args.impose_btt ~rtl:args.impose_rtl pdf range) | Some ProcessImages -> let pdf = get_single_pdf args.op false in - Cpdfimage.process pdf ~q:args.jpegquality ~qlossless:args.jpegqualitylossless ~onebppmethod:args.onebppmethod ~path_to_jbig2enc:args.path_to_jbig2enc ~path_to_convert:args.path_to_convert; + Cpdfimage.process pdf ~q:args.jpegquality ~qlossless:args.jpegqualitylossless ~onebppmethod:args.onebppmethod ~pixel_threshold:args.pixel_threshold ~path_to_jbig2enc:args.path_to_jbig2enc ~path_to_convert:args.path_to_convert; write_pdf false pdf (* Advise the user if a combination of command line flags makes little sense, diff --git a/cpdfimage.ml b/cpdfimage.ml index ce38af2..0bb61ed 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -490,7 +490,10 @@ let image_of_input fobj i = let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in Pdfpage.add_root pageroot [] pdf -let jpeg_to_jpeg pdf ~q ~path_to_convert s dict reference = +let jpeg_to_jpeg pdf ~pixel_threshold ~q ~path_to_convert s dict reference = + let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in + let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in + if w * h < pixel_threshold then () else Pdf.getstream s; let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in @@ -531,16 +534,17 @@ let suitable_num pdf dict = | Some (Pdf.Array (Pdf.Name "/Separation"::_)) -> ~-1 | _ -> 0 -let lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference = +let lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict reference = let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in let components = suitable_num pdf dict in match components, bpc with | (1 | 3 | 4 | -1), Some (Pdf.Integer 8) -> + let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in + let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in + if w * h < pixel_threshold then () else let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in Pdfcodec.decode_pdfstream_until_unknown pdf s; begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None -> - let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in - let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in let out = Filename.temp_file "cpdf" "convertin" ^ (if suitable_num pdf dict < 4 then ".pnm" else ".cmyk") in let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in let fh = open_out_bin out in @@ -578,23 +582,23 @@ let lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference = Sys.remove out2 end | colspace, bpc -> - let colspace = Pdf.lookup_direct pdf "/ColorSpace" dict in + (*let colspace = Pdf.lookup_direct pdf "/ColorSpace" dict in let colspace, bpc, filter = (match colspace with None -> "none" | Some x -> Pdfwrite.string_of_pdf x), (match bpc with None -> "none" | Some x -> Pdfwrite.string_of_pdf x), (match Pdf.lookup_direct pdf "/Filter" dict with None -> "none" | Some x -> Pdfwrite.string_of_pdf x) in print_string (Pdfwrite.string_of_pdf dict); - print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter); + print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);*) () (* an image we cannot or do not handle *) -let recompress_1bpp_jbig2_lossless ~path_to_jbig2enc pdf s dict reference = +let recompress_1bpp_jbig2_lossless ~pixel_threshold ~path_to_jbig2enc pdf s dict reference = + let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in + let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in + if w * h < pixel_threshold then () else (* jbig2enc fails on tiny images *) let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in Pdfcodec.decode_pdfstream_until_unknown pdf s; match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None -> - let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in - let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in - if w < 5 || h < 5 then () else (* jbig2enc fails on tiny images *) let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jbig2" in let fh = open_out_bin out in @@ -628,7 +632,7 @@ let recompress_1bpp_jbig2_lossless ~path_to_jbig2enc pdf s dict reference = (* JPEG to JPEG: RGB and CMYK JPEGS *) (* Lossless to JPEG: 8bpp Grey, 8bpp RGB, 8bpp CMYK including separation and ICCBased colourspaces *) (* 1 bit: anything to JBIG2 lossless (no globals) *) -let process ?q ?qlossless ?onebppmethod pdf ~path_to_jbig2enc ~path_to_convert = +let process ?q ?qlossless ?onebppmethod ~pixel_threshold pdf ~path_to_jbig2enc ~path_to_convert = let process_obj _ s = match s with | Pdf.Stream ({contents = dict, _} as reference) -> @@ -640,18 +644,18 @@ let process ?q ?qlossless ?onebppmethod pdf ~path_to_jbig2enc ~path_to_convert = with | Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ -> begin match q with - | Some q -> jpeg_to_jpeg pdf ~q ~path_to_convert s dict reference + | Some q -> jpeg_to_jpeg pdf ~pixel_threshold ~q ~path_to_convert s dict reference | None -> () end | Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _ | Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) -> begin match onebppmethod with - | Some "JBIG2" -> recompress_1bpp_jbig2_lossless ~path_to_jbig2enc pdf s dict reference + | Some "JBIG2" -> recompress_1bpp_jbig2_lossless ~pixel_threshold ~path_to_jbig2enc pdf s dict reference | _ -> () end | Some (Pdf.Name "/Image"), _, _, _ -> begin match qlossless with - | Some qlossless -> lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference + | Some qlossless -> lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict reference | None -> () end | _ -> () (* not an image *) diff --git a/cpdfimage.mli b/cpdfimage.mli index 7cc6c7a..d919add 100644 --- a/cpdfimage.mli +++ b/cpdfimage.mli @@ -11,7 +11,7 @@ val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * (** List images in JSON format *) val images : Pdf.t -> int list -> Cpdfyojson.Safe.t -val process : ?q:int -> ?qlossless:int -> ?onebppmethod:string -> Pdf.t -> path_to_jbig2enc:string -> path_to_convert:string -> unit +val process : ?q:int -> ?qlossless:int -> ?onebppmethod:string -> pixel_threshold:int -> Pdf.t -> path_to_jbig2enc:string -> path_to_convert:string -> unit (**/**) val image_of_input : (unit -> Pdfio.bytes -> Pdf.pdfobject * (int * Pdf.pdfobject) list) -> Pdfio.input -> Pdf.t