Ints -> floats in process-image parameters

This commit is contained in:
John Whitington 2024-02-01 16:38:07 +00:00
parent 333242c25a
commit ed30ea40c0
3 changed files with 59 additions and 69 deletions

View File

@ -530,14 +530,14 @@ type args =
mutable toc_bookmark : bool; mutable toc_bookmark : bool;
mutable idir_only_pdfs : bool; mutable idir_only_pdfs : bool;
mutable no_warn_rotate : bool; mutable no_warn_rotate : bool;
mutable jpegquality : int; mutable jpegquality : float;
mutable jpegqualitylossless : int; mutable jpegqualitylossless : float;
mutable onebppmethod : string; mutable onebppmethod : string;
mutable pixel_threshold : int; mutable pixel_threshold : int;
mutable length_threshold : int; mutable length_threshold : int;
mutable percentage_threshold : int; mutable percentage_threshold : float;
mutable dpi_threshold : int; mutable dpi_threshold : float;
mutable resample_factor : int; mutable resample_factor : float;
mutable resample_interpolate : bool; mutable resample_interpolate : bool;
mutable jbig2_lossy_threshold : float; mutable jbig2_lossy_threshold : float;
mutable extract_stream_decompress : bool} mutable extract_stream_decompress : bool}
@ -665,14 +665,14 @@ let args =
toc_bookmark = true; toc_bookmark = true;
idir_only_pdfs = false; idir_only_pdfs = false;
no_warn_rotate = false; no_warn_rotate = false;
jpegquality = 100; jpegquality = 100.;
jpegqualitylossless = 101; jpegqualitylossless = 101.;
onebppmethod = ""; onebppmethod = "";
pixel_threshold = 25; pixel_threshold = 25;
length_threshold = 100; length_threshold = 100;
percentage_threshold = 99; percentage_threshold = 99.;
dpi_threshold = 0; dpi_threshold = 0.;
resample_factor = 101; resample_factor = 101.;
resample_interpolate = false; resample_interpolate = false;
jbig2_lossy_threshold = 0.85; jbig2_lossy_threshold = 0.85;
extract_stream_decompress = false} extract_stream_decompress = false}
@ -786,14 +786,14 @@ let reset_arguments () =
args.toc_title <- "Table of Contents"; args.toc_title <- "Table of Contents";
args.toc_bookmark <- true; args.toc_bookmark <- true;
args.idir_only_pdfs <- false; args.idir_only_pdfs <- false;
args.jpegquality <- 100; args.jpegquality <- 100.;
args.jpegqualitylossless <- 101; args.jpegqualitylossless <- 101.;
args.onebppmethod <- ""; args.onebppmethod <- "";
args.pixel_threshold <- 25; args.pixel_threshold <- 25;
args.length_threshold <- 100; args.length_threshold <- 100;
args.percentage_threshold <- 99; args.percentage_threshold <- 99.;
args.dpi_threshold <- 0; args.dpi_threshold <- 0.;
args.resample_factor <- 101; args.resample_factor <- 101.;
args.resample_interpolate <- false; args.resample_interpolate <- false;
args.jbig2_lossy_threshold <- 0.85; args.jbig2_lossy_threshold <- 0.85;
args.extract_stream_decompress <- false; args.extract_stream_decompress <- false;
@ -1840,7 +1840,7 @@ let setlosslessresample i =
args.resample_factor <- i args.resample_factor <- i
let setlosslessresampledpi i = let setlosslessresampledpi i =
args.resample_factor <- -i args.resample_factor <- -.i
let setresampleinterpolate () = let setresampleinterpolate () =
args.resample_interpolate <- true args.resample_interpolate <- true
@ -2651,10 +2651,10 @@ and specs =
Arg.String setjbig2encpath, Arg.String setjbig2encpath,
" Path to jbig2enc executable"); " Path to jbig2enc executable");
("-jpeg-to-jpeg", ("-jpeg-to-jpeg",
Arg.Int setjpegquality, Arg.Float setjpegquality,
" Set JPEG quality for existing JPEGs"); " Set JPEG quality for existing JPEGs");
("-lossless-to-jpeg", ("-lossless-to-jpeg",
Arg.Int setjpegqualitylossless, Arg.Float setjpegqualitylossless,
" Set JPEG quality for existing lossless images"); " Set JPEG quality for existing lossless images");
("-1bpp-method", ("-1bpp-method",
Arg.String set1bppmethod, Arg.String set1bppmethod,
@ -2669,16 +2669,16 @@ and specs =
Arg.Int setlengththreshold, Arg.Int setlengththreshold,
" Only process images with data longer than this"); " Only process images with data longer than this");
("-percentage-threshold", ("-percentage-threshold",
Arg.Int setpercentagethreshold, Arg.Float setpercentagethreshold,
" Only substitute lossy image when smaller than this"); " Only substitute lossy image when smaller than this");
("-dpi-threshold", ("-dpi-threshold",
Arg.Int setdpithreshold, Arg.Float setdpithreshold,
" Only process image when always higher than this dpi"); " Only process image when always higher than this dpi");
("-lossless-resample", ("-lossless-resample",
Arg.Int setlosslessresample, Arg.Float setlosslessresample,
" Resample lossless images to given part of original"); " Resample lossless images to given part of original");
("-lossless-resample-dpi", ("-lossless-resample-dpi",
Arg.Int setlosslessresampledpi, Arg.Float setlosslessresampledpi,
" Resample lossless images to given DPI"); " Resample lossless images to given DPI");
("-resample-interpolate", ("-resample-interpolate",
Arg.Unit setresampleinterpolate, Arg.Unit setresampleinterpolate,

View File

@ -505,7 +505,7 @@ let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q
close_out fh; close_out fh;
let retcode = let retcode =
let command = let command =
(Filename.quote_command path_to_convert [out; "-quality"; string_of_int q ^ "%"; out2]) (Filename.quote_command path_to_convert [out; "-quality"; string_of_float q ^ "%"; out2]) (*FIXME check percentage as float here *)
in in
(*Printf.printf "%S\n" command;*) Sys.command command (*Printf.printf "%S\n" command;*) Sys.command command
in in
@ -513,7 +513,7 @@ let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q
begin begin
let result = open_in_bin out2 in let result = open_in_bin out2 in
let newsize = in_channel_length result in let newsize = in_channel_length result in
let perc_ok = float newsize /. float size < float_of_int percentage_threshold /. 100. in let perc_ok = float newsize /. float size < percentage_threshold /. 100. in
if newsize < size && perc_ok then if newsize < size && perc_ok then
begin begin
if !debug_image_processing then Printf.printf "JPEG to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.)); if !debug_image_processing then Printf.printf "JPEG to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
@ -589,7 +589,7 @@ let lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshol
let command = let command =
(Filename.quote_command path_to_convert (Filename.quote_command path_to_convert
((if components = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @ ((if components = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @
[out; "-quality"; string_of_int qlossless ^ "%"] @ [out; "-quality"; string_of_float qlossless ^ "%"] @
(if components = 1 then ["-colorspace"; "Gray"] else if components = 4 then ["-colorspace"; "CMYK"] else []) @ (if components = 1 then ["-colorspace"; "Gray"] else if components = 4 then ["-colorspace"; "CMYK"] else []) @
[out2])) [out2]))
in in
@ -599,7 +599,7 @@ let lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshol
begin begin
let result = open_in_bin out2 in let result = open_in_bin out2 in
let newsize = in_channel_length result in let newsize = in_channel_length result in
let perc_ok = float newsize /. float size < float_of_int percentage_threshold /. 100. in let perc_ok = float newsize /. float size < percentage_threshold /. 100. in
if newsize < size && perc_ok then if newsize < size && perc_ok then
begin begin
if !debug_image_processing then Printf.printf "lossless to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.)); if !debug_image_processing then Printf.printf "lossless to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
@ -629,7 +629,7 @@ let lossless_resample pdf ~pixel_threshold ~length_threshold ~factor ~interpolat
Filename.quote_command path_to_convert Filename.quote_command path_to_convert
((if components = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @ ((if components = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @
(if components = 1 then ["-define"; "png:color-type=0"; "-colorspace"; "Gray"] else if components = 3 then ["-define"; "-png:color-type=2"; "-colorspace"; "RGB"] else if components = 4 then ["-colorspace"; "CMYK"] else []) @ (if components = 1 then ["-define"; "png:color-type=0"; "-colorspace"; "Gray"] else if components = 3 then ["-define"; "-png:color-type=2"; "-colorspace"; "RGB"] else if components = 4 then ["-colorspace"; "CMYK"] else []) @
[if interpolate && components > -2 then "-resize" else "-sample"; string_of_int factor ^ "%"; out; out2]) [if interpolate && components > -2 then "-resize" else "-sample"; string_of_float factor ^ "%"; out; out2])
in in
(*Printf.printf "%S\n" command;*) (*Printf.printf "%S\n" command;*)
Sys.command command Sys.command command
@ -662,11 +662,9 @@ let lossless_resample pdf ~pixel_threshold ~length_threshold ~factor ~interpolat
let lossless_resample_target_dpi objnum pdf ~pixel_threshold ~length_threshold ~factor ~target_dpi_info ~interpolate ~path_to_convert s dict reference = let lossless_resample_target_dpi objnum pdf ~pixel_threshold ~length_threshold ~factor ~target_dpi_info ~interpolate ~path_to_convert s dict reference =
Printf.printf "lossless_resample_target_dpi\n"; Printf.printf "lossless_resample_target_dpi\n";
let real_factor = let real_factor = factor /. Hashtbl.find target_dpi_info objnum *. 100. in
int_of_float (float_of_int factor /. Hashtbl.find target_dpi_info objnum *. 100.) Printf.printf "real_factor = %f\n" real_factor;
in if real_factor < 100. then
Printf.printf "real_factor = %i\n" real_factor;
if real_factor < 100 then
lossless_resample pdf ~pixel_threshold ~length_threshold ~factor:real_factor ~interpolate ~path_to_convert s dict reference lossless_resample pdf ~pixel_threshold ~length_threshold ~factor:real_factor ~interpolate ~path_to_convert s dict reference
let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference = let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference =
@ -727,7 +725,7 @@ let preprocess_jbig2_lossy ~path_to_jbig2enc ~jbig2_lossy_threshold ~length_thre
| Pdf.Stream ({contents = dict, _} as reference) -> | Pdf.Stream ({contents = dict, _} as reference) ->
let old = !reference in let old = !reference in
let restore () = reference := old in let restore () = reference := old in
if Hashtbl.mem inrange objnum && (dpi_threshold = 0 || Hashtbl.mem highdpi objnum) then begin match if Hashtbl.mem inrange objnum && (dpi_threshold = 0. || Hashtbl.mem highdpi objnum) then begin match
Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Subtype" dict,
Pdf.lookup_direct pdf "/BitsPerComponent" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict,
Pdf.lookup_direct pdf "/ImageMask" dict Pdf.lookup_direct pdf "/ImageMask" dict
@ -808,7 +806,7 @@ let preprocess_jbig2_lossy ~path_to_jbig2enc ~jbig2_lossy_threshold ~length_thre
remove (jbig2out ^ ".sym") remove (jbig2out ^ ".sym")
let process let process
?q ?qlossless ?onebppmethod ~jbig2_lossy_threshold ~length_threshold ~percentage_threshold ~pixel_threshold ~dpi_threshold ~q ~qlossless ~onebppmethod ~jbig2_lossy_threshold ~length_threshold ~percentage_threshold ~pixel_threshold ~dpi_threshold
~factor ~interpolate ~path_to_jbig2enc ~path_to_convert range pdf ~factor ~interpolate ~path_to_jbig2enc ~path_to_convert range pdf
= =
let inrange = let inrange =
@ -818,14 +816,14 @@ let process
in in
let highdpi, target_dpi_info = let highdpi, target_dpi_info =
let objnums, dpi = let objnums, dpi =
if dpi_threshold = 0 && factor > 0 then ([], []) else if dpi_threshold = 0. && factor > 0. then ([], []) else
let results = image_resolution pdf range max_float in let results = image_resolution pdf range max_float in
(*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "From image_resolution %f %f %i\n" wdpi hdpi objnum) results;*) (*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "From image_resolution %f %f %i\n" wdpi hdpi objnum) results;*)
let cmp (_, _, _, _, _, _, a) (_, _, _, _, _, _, b) = compare a b in let cmp (_, _, _, _, _, _, a) (_, _, _, _, _, _, b) = compare a b in
let sets = collate cmp (sort cmp results) in let sets = collate cmp (sort cmp results) in
let heads = map hd (map (sort (fun (_, _, _, _, a, b, _) (_, _, _, _, c, d, _) -> compare (fmin a b) (fmin c d))) sets) in let heads = map hd (map (sort (fun (_, _, _, _, a, b, _) (_, _, _, _, c, d, _) -> compare (fmin a b) (fmin c d))) sets) in
(*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "Lowest resolution exemplar %f %f %i\n" wdpi hdpi objnum) heads;*) (*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "Lowest resolution exemplar %f %f %i\n" wdpi hdpi objnum) heads;*)
let needed = keep (fun (_, _, _, _, wdpi, hdpi, objnum) -> fmin wdpi hdpi > float_of_int dpi_threshold) heads in let needed = keep (fun (_, _, _, _, wdpi, hdpi, objnum) -> fmin wdpi hdpi > dpi_threshold) heads in
(*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "keep %f %f %i\n" wdpi hdpi objnum) needed;*) (*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "keep %f %f %i\n" wdpi hdpi objnum) needed;*)
map (fun (_, _, _, _, _, _, objnum) -> objnum) needed, map (fun (_, _, _, _, _, _, objnum) -> objnum) needed,
let r = let r =
@ -835,33 +833,29 @@ let process
in in
hashset_of_list objnums, hashtable_of_dictionary dpi hashset_of_list objnums, hashtable_of_dictionary dpi
in in
begin match onebppmethod with Some "JBIG2Lossy" -> preprocess_jbig2_lossy ~path_to_jbig2enc ~jbig2_lossy_threshold ~dpi_threshold ~length_threshold ~pixel_threshold inrange highdpi pdf | _ -> () end; begin match onebppmethod with "JBIG2Lossy" -> preprocess_jbig2_lossy ~path_to_jbig2enc ~jbig2_lossy_threshold ~dpi_threshold ~length_threshold ~pixel_threshold inrange highdpi pdf | _ -> () end;
let nobjects = Pdf.objcard pdf in let nobjects = Pdf.objcard pdf in
let ndone = ref 0 in let ndone = ref 0 in
let process_obj objnum s = let process_obj objnum s =
match s with match s with
| Pdf.Stream ({contents = dict, _} as reference) -> | Pdf.Stream ({contents = dict, _} as reference) ->
ndone += 1; ndone += 1;
if Hashtbl.mem inrange objnum && (dpi_threshold = 0 || Hashtbl.mem highdpi objnum) then begin match if Hashtbl.mem inrange objnum && (dpi_threshold = 0. || Hashtbl.mem highdpi objnum) then begin match
Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Subtype" dict,
Pdf.lookup_direct pdf "/Filter" dict, Pdf.lookup_direct pdf "/Filter" dict,
Pdf.lookup_direct pdf "/BitsPerComponent" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict,
Pdf.lookup_direct pdf "/ImageMask" dict Pdf.lookup_direct pdf "/ImageMask" dict
with with
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ -> | Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ ->
begin match q with if q < 100. then
| Some q ->
if q < 100 then
begin begin
if !debug_image_processing then Printf.printf "(%i/%i) Object %i (JPEG)... %!" !ndone nobjects objnum; if !debug_image_processing then Printf.printf "(%i/%i) Object %i (JPEG)... %!" !ndone nobjects objnum;
jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference
end end
| None -> ()
end
| Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _ | Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _
| Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) -> | Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) ->
begin match onebppmethod with begin match onebppmethod with
| Some "JBIG2" -> | "JBIG2" ->
begin begin
if !debug_image_processing then Printf.printf "(%i/%i) object %i (1bpp)... %!" !ndone nobjects objnum; if !debug_image_processing then Printf.printf "(%i/%i) object %i (1bpp)... %!" !ndone nobjects objnum;
recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
@ -869,27 +863,23 @@ let process
| _ -> () | _ -> ()
end end
| Some (Pdf.Name "/Image"), _, _, _ -> | Some (Pdf.Name "/Image"), _, _, _ ->
Printf.printf "Lossless resample: factor = %i\n" factor; Printf.printf "Lossless resample: factor = %f\n" factor;
begin match qlossless with if qlossless < 101. then
| Some qlossless ->
if qlossless < 101 then
begin begin
if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)... %!" !ndone nobjects objnum; if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)... %!" !ndone nobjects objnum;
lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference
end end
else else
begin begin
if factor < 101 then if factor < 101. then
begin begin
if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)... %!" !ndone nobjects objnum; if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)... %!" !ndone nobjects objnum;
if factor < 0 then if factor < 0. then
lossless_resample_target_dpi objnum pdf ~pixel_threshold ~length_threshold ~factor:~-factor ~target_dpi_info ~interpolate ~path_to_convert s dict reference lossless_resample_target_dpi objnum pdf ~pixel_threshold ~length_threshold ~factor:~-.factor ~target_dpi_info ~interpolate ~path_to_convert s dict reference
else else
lossless_resample pdf ~pixel_threshold ~length_threshold ~factor ~interpolate ~path_to_convert s dict reference lossless_resample pdf ~pixel_threshold ~length_threshold ~factor ~interpolate ~path_to_convert s dict reference
end end
end end
| None -> ()
end
| _ -> () (* not an image *) | _ -> () (* not an image *)
end end
| _ -> ndone += 1 (* not a stream *) | _ -> ndone += 1 (* not a stream *)

View File

@ -16,9 +16,9 @@ val images : Pdf.t -> int list -> Cpdfyojson.Safe.t
(** Reprocess images *) (** Reprocess images *)
val process : val process :
?q:int -> ?qlossless:int -> ?onebppmethod:string -> jbig2_lossy_threshold:float -> q:float -> qlossless:float -> onebppmethod:string -> jbig2_lossy_threshold:float ->
length_threshold:int -> percentage_threshold:int -> pixel_threshold:int -> length_threshold:int -> percentage_threshold:float -> pixel_threshold:int ->
dpi_threshold:int -> factor:int -> interpolate:bool -> dpi_threshold:float -> factor:float -> interpolate:bool ->
path_to_jbig2enc:string -> path_to_convert:string -> int list -> Pdf.t -> unit path_to_jbig2enc:string -> path_to_convert:string -> int list -> Pdf.t -> unit
(**/**) (**/**)