length_threshold working

This commit is contained in:
John Whitington 2023-12-28 11:32:43 +00:00
parent 94cb707dd9
commit 6f3cf112f9
2 changed files with 14 additions and 12 deletions

View File

@ -653,7 +653,7 @@ let args =
idir_only_pdfs = false; idir_only_pdfs = false;
no_warn_rotate = false; no_warn_rotate = false;
jpegquality = 100; jpegquality = 100;
jpegqualitylossless = 100; jpegqualitylossless = 101;
onebppmethod = ""; onebppmethod = "";
pixel_threshold = 25; pixel_threshold = 25;
length_threshold = 100; length_threshold = 100;
@ -763,7 +763,7 @@ let reset_arguments () =
args.toc_bookmark <- true; args.toc_bookmark <- true;
args.idir_only_pdfs <- false; args.idir_only_pdfs <- false;
args.jpegquality <- 100; args.jpegquality <- 100;
args.jpegqualitylossless <- 100; args.jpegqualitylossless <- 101;
args.onebppmethod <- ""; args.onebppmethod <- "";
args.pixel_threshold <- 25; args.pixel_threshold <- 25;
args.length_threshold <- 100; args.length_threshold <- 100;

View File

@ -490,17 +490,17 @@ let image_of_input fobj i =
let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in
Pdfpage.add_root pageroot [] pdf Pdfpage.add_root pageroot [] pdf
let jpeg_to_jpeg pdf ~pixel_threshold ~q ~path_to_convert s dict reference = let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference =
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
if w * h < pixel_threshold then () else if w * h < pixel_threshold then () else
Pdf.getstream s; Pdf.getstream s;
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
if size < length_threshold then () else
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
let fh = open_out_bin out in let fh = open_out_bin out in
let size = begin match s with Pdf.Stream {contents = _, Pdf.Got d} -> Pdfio.bytes_to_output_channel fh d | _ -> () end;
begin match s with Pdf.Stream {contents = _, Pdf.Got d} -> Pdfio.bytes_to_output_channel fh d; bytes_size d | _ -> 0 end
in
close_out fh; close_out fh;
let retcode = let retcode =
let command = let command =
@ -534,7 +534,7 @@ let suitable_num pdf dict =
| Some (Pdf.Array (Pdf.Name "/Separation"::_)) -> ~-1 | Some (Pdf.Array (Pdf.Name "/Separation"::_)) -> ~-1
| _ -> 0 | _ -> 0
let lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict reference = let lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference =
let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in
let components = suitable_num pdf dict in let components = suitable_num pdf dict in
match components, bpc with match components, bpc with
@ -543,6 +543,7 @@ let lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict ref
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
if w * h < pixel_threshold then () else if w * h < pixel_threshold then () else
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
if size < length_threshold then () else
Pdfcodec.decode_pdfstream_until_unknown pdf s; Pdfcodec.decode_pdfstream_until_unknown pdf s;
begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None -> begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
let out = Filename.temp_file "cpdf" "convertin" ^ (if suitable_num pdf dict < 4 then ".pnm" else ".cmyk") in let out = Filename.temp_file "cpdf" "convertin" ^ (if suitable_num pdf dict < 4 then ".pnm" else ".cmyk") in
@ -592,11 +593,12 @@ let lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict ref
print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);*) print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);*)
() (* an image we cannot or do not handle *) () (* an image we cannot or do not handle *)
let recompress_1bpp_jbig2_lossless ~pixel_threshold ~path_to_jbig2enc pdf s dict reference = let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference =
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
if w * h < pixel_threshold then () else (* jbig2enc fails on tiny images *) if w * h < pixel_threshold then () else (* pixel_threshold (but also, jbig2enc fails on tiny images) *)
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
if size < length_threshold then () else
Pdfcodec.decode_pdfstream_until_unknown pdf s; Pdfcodec.decode_pdfstream_until_unknown pdf s;
match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None -> match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in
@ -648,20 +650,20 @@ let process
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ -> | Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ ->
begin match q with begin match q with
| Some q -> | Some q ->
if q < 100 then jpeg_to_jpeg pdf ~pixel_threshold ~q ~path_to_convert s dict reference if q < 100 then jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference
| None -> () | None -> ()
end end
| Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _ | Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _
| Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) -> | Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) ->
begin match onebppmethod with begin match onebppmethod with
| Some "JBIG2" -> | Some "JBIG2" ->
recompress_1bpp_jbig2_lossless ~pixel_threshold ~path_to_jbig2enc pdf s dict reference recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
| _ -> () | _ -> ()
end end
| Some (Pdf.Name "/Image"), _, _, _ -> | Some (Pdf.Name "/Image"), _, _, _ ->
begin match qlossless with begin match qlossless with
| Some qlossless -> | Some qlossless ->
if qlossless < 100 then lossless_to_jpeg pdf ~pixel_threshold ~qlossless ~path_to_convert s dict reference if qlossless < 101 then lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference
| None -> () | None -> ()
end end
| _ -> () (* not an image *) | _ -> () (* not an image *)