towards better -process-images-info
This commit is contained in:
parent
c953d0f2a9
commit
70e9092e01
48
cpdfimage.ml
48
cpdfimage.ml
|
@ -494,10 +494,10 @@ let image_of_input fobj i =
|
||||||
let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference =
|
let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference =
|
||||||
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
||||||
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
||||||
if w * h < pixel_threshold then () else
|
if w * h < pixel_threshold then (if !debug_image_processing then Printf.printf "pixel threshold not met\n%!") else
|
||||||
Pdf.getstream s;
|
Pdf.getstream s;
|
||||||
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
||||||
if size < length_threshold then () else
|
if size < length_threshold then (if !debug_image_processing then Printf.printf "length threshold not met\n%!") else
|
||||||
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
||||||
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
||||||
let fh = open_out_bin out in
|
let fh = open_out_bin out in
|
||||||
|
@ -542,9 +542,9 @@ let lossless_out pdf ~pixel_threshold ~length_threshold extension s dict referen
|
||||||
| (1 | 3 | 4 | -1), Some (Pdf.Integer 8) ->
|
| (1 | 3 | 4 | -1), Some (Pdf.Integer 8) ->
|
||||||
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
||||||
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
||||||
if w * h < pixel_threshold then None else
|
if w * h < pixel_threshold then (if !debug_image_processing then Printf.printf "pixel threshold not met\n%!"; None) else
|
||||||
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
||||||
if size < length_threshold then None else
|
if size < length_threshold then (if !debug_image_processing then Printf.printf "length threshold not met\n%!"; None) else
|
||||||
begin
|
begin
|
||||||
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
||||||
match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> None | None ->
|
match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> None | None ->
|
||||||
|
@ -586,13 +586,17 @@ let lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshol
|
||||||
let newsize = in_channel_length result in
|
let newsize = in_channel_length result in
|
||||||
if newsize < size then
|
if newsize < size then
|
||||||
begin
|
begin
|
||||||
if !debug_image_processing then Printf.printf "Lossless to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
|
if !debug_image_processing then Printf.printf "lossless to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
|
||||||
reference :=
|
reference :=
|
||||||
(Pdf.add_dict_entry
|
(Pdf.add_dict_entry
|
||||||
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
||||||
"/Filter"
|
"/Filter"
|
||||||
(Pdf.Name "/DCTDecode")),
|
(Pdf.Name "/DCTDecode")),
|
||||||
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "no size reduction\n%!"
|
||||||
end;
|
end;
|
||||||
close_in result
|
close_in result
|
||||||
end;
|
end;
|
||||||
|
@ -617,13 +621,17 @@ let lossless_resample pdf ~pixel_threshold ~length_threshold ~percentage_thresho
|
||||||
let newsize = in_channel_length result in
|
let newsize = in_channel_length result in
|
||||||
if newsize < size then
|
if newsize < size then
|
||||||
begin
|
begin
|
||||||
if !debug_image_processing then Printf.printf "Lossless to JPEG %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
|
if !debug_image_processing then Printf.printf "lossless resample %i -> %i (%i%%)\n%!" size newsize (int_of_float (float newsize /. float size *. 100.));
|
||||||
reference :=
|
reference :=
|
||||||
(Pdf.add_dict_entry
|
(Pdf.add_dict_entry
|
||||||
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
||||||
"/Filter"
|
"/Filter"
|
||||||
(Pdf.Name "/DCTDecode")),
|
(Pdf.Name "/DCTDecode")),
|
||||||
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "no size reduction\n%!"
|
||||||
end;
|
end;
|
||||||
close_in result
|
close_in result
|
||||||
end;
|
end;
|
||||||
|
@ -633,11 +641,11 @@ let lossless_resample pdf ~pixel_threshold ~length_threshold ~percentage_thresho
|
||||||
let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference =
|
let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference =
|
||||||
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
||||||
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
||||||
if w * h < pixel_threshold then () else (* pixel_threshold (but also, jbig2enc fails on tiny images) *)
|
if w * h < pixel_threshold then (if !debug_image_processing then Printf.printf "pixel threshold not met\n%!") else (* (but also, jbig2enc fails on tiny images) *)
|
||||||
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
||||||
if size < length_threshold then () else
|
if size < length_threshold then if !debug_image_processing then Printf.printf "length threshold not met\n%!" else
|
||||||
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
||||||
match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
|
match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> if !debug_image_processing then Printf.printf "could not decode - skipping\n%!" | None ->
|
||||||
let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in
|
let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in
|
||||||
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jbig2" in
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jbig2" in
|
||||||
let fh = open_out_bin out in
|
let fh = open_out_bin out in
|
||||||
|
@ -662,6 +670,10 @@ let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_j
|
||||||
"/Filter"
|
"/Filter"
|
||||||
(Pdf.Name "/JBIG2Decode")) "/DecodeParms"),
|
(Pdf.Name "/JBIG2Decode")) "/DecodeParms"),
|
||||||
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "no size reduction\n%!"
|
||||||
end;
|
end;
|
||||||
close_in result
|
close_in result
|
||||||
end;
|
end;
|
||||||
|
@ -688,25 +700,33 @@ let process
|
||||||
Pdf.lookup_direct pdf "/ImageMask" dict
|
Pdf.lookup_direct pdf "/ImageMask" dict
|
||||||
with
|
with
|
||||||
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ ->
|
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ ->
|
||||||
if !debug_image_processing then Printf.printf "(%i/%i) Object %i (JPEG)...\n%!" !ndone nobjects objnum;
|
|
||||||
begin match q with
|
begin match q with
|
||||||
| Some q ->
|
| Some q ->
|
||||||
if q < 100 then jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference
|
if q < 100 then
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "(%i/%i) Object %i (JPEG)... %!" !ndone nobjects objnum;
|
||||||
|
jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference
|
||||||
|
end
|
||||||
| None -> ()
|
| None -> ()
|
||||||
end
|
end
|
||||||
| Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _
|
| Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _
|
||||||
| Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) ->
|
| Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) ->
|
||||||
if !debug_image_processing then Printf.printf "(%i/%i) object %i (1bpp)...\n%!" !ndone nobjects objnum;
|
|
||||||
begin match onebppmethod with
|
begin match onebppmethod with
|
||||||
| Some "JBIG2" ->
|
| Some "JBIG2" ->
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "(%i/%i) object %i (1bpp)... %!" !ndone nobjects objnum;
|
||||||
recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
|
recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
|
||||||
|
end
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
end
|
end
|
||||||
| Some (Pdf.Name "/Image"), _, _, _ ->
|
| Some (Pdf.Name "/Image"), _, _, _ ->
|
||||||
if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)...\n%!" !ndone nobjects objnum;
|
|
||||||
begin match qlossless with
|
begin match qlossless with
|
||||||
| Some qlossless ->
|
| Some qlossless ->
|
||||||
if qlossless < 101 then lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference
|
if qlossless < 101 then
|
||||||
|
begin
|
||||||
|
if !debug_image_processing then Printf.printf "(%i/%i) object %i (lossless)... %!" !ndone nobjects objnum;
|
||||||
|
lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference
|
||||||
|
end
|
||||||
| None -> ()
|
| None -> ()
|
||||||
end
|
end
|
||||||
| _ -> () (* not an image *)
|
| _ -> () (* not an image *)
|
||||||
|
|
Loading…
Reference in New Issue