Add debug output for image processing

This commit is contained in:
John Whitington 2023-12-28 16:18:25 +00:00
parent 707525d8da
commit 86cd0c3bb7
3 changed files with 23 additions and 9 deletions

View File

@ -1956,6 +1956,9 @@ let setlengththreshold i =
let setpercentagethreshold i =
args.percentage_threshold <- i
let setprocessimagesinfo () =
set Cpdfimage.debug_image_processing
(* Parse a control file, make an argv, and then make Arg parse it. *)
let rec make_control_argv_and_parse filename =
control_args := !control_args @ parse_control_file filename
@ -2730,6 +2733,9 @@ and specs =
("-process-images",
Arg.Unit (setop ProcessImages),
" Process images within PDF");
("-process-images-info",
Arg.Unit setprocessimagesinfo,
" Show info when processing images");
("-convert",
Arg.String setconvertpath,
" Path to convert executable");

View File

@ -2,6 +2,8 @@ open Pdfutil
open Pdfio
open Cpdferror
let debug_image_processing = ref false
let pnm_white ch = output_char ch ' '
let pnm_newline ch = output_char ch '\n'
let pnm_output_string = Stdlib.output_string
@ -528,7 +530,7 @@ let jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q
let result = open_in_bin out2 in
let newsize = in_channel_length result in
if newsize < size then
(*Printf.printf "JPEG to JPEG %i -> %i\n" size newsize;*)
if !debug_image_processing then Printf.printf "JPEG to JPEG %i -> %i\n%!" size newsize;
reference := Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize), Pdf.Got (Pdfio.bytes_of_input_channel result)
end;
Sys.remove out;
@ -584,7 +586,7 @@ let lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshol
let newsize = in_channel_length result in
if newsize < size then
begin
(*Printf.printf "Lossless to JPEG %i -> %i (components %i) \n" size newsize components;*)
if !debug_image_processing then Printf.printf "Lossless to JPEG %i -> %i\n%!" size newsize;
reference :=
(Pdf.add_dict_entry
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
@ -632,7 +634,7 @@ let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_j
let newsize = in_channel_length result in
if newsize < size then
begin
(*Printf.printf "1bpp to JBIG2 %i -> %i \n" size newsize;*)
if !debug_image_processing then Printf.printf "1bpp to JBIG2 %i -> %i\n%!" size newsize;
reference :=
(Pdf.remove_dict_entry
(Pdf.add_dict_entry
@ -653,7 +655,7 @@ let process
?q ?qlossless ?onebppmethod ~length_threshold ~percentage_threshold ~pixel_threshold
~path_to_jbig2enc ~path_to_convert pdf
=
let process_obj _ s =
let process_obj objnum s =
match s with
| Pdf.Stream ({contents = dict, _} as reference) ->
begin match
@ -663,6 +665,7 @@ let process
Pdf.lookup_direct pdf "/ImageMask" dict
with
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]), _, _ ->
if !debug_image_processing then Printf.printf "Object %i (JPEG)...\n%!" objnum;
begin match q with
| Some q ->
if q < 100 then jpeg_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~q ~path_to_convert s dict reference
@ -670,12 +673,14 @@ let process
end
| Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _
| Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) ->
begin match onebppmethod with
| Some "JBIG2" ->
recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
| _ -> ()
end
if !debug_image_processing then Printf.printf "Object %i (1bpp)...\n%!" objnum;
begin match onebppmethod with
| Some "JBIG2" ->
recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_jbig2enc pdf s dict reference
| _ -> ()
end
| Some (Pdf.Name "/Image"), _, _, _ ->
if !debug_image_processing then Printf.printf "Object %i (lossless)...\n%!" objnum;
begin match qlossless with
| Some qlossless ->
if qlossless < 101 then lossless_to_jpeg pdf ~pixel_threshold ~length_threshold ~percentage_threshold ~qlossless ~path_to_convert s dict reference

View File

@ -1,5 +1,8 @@
(** Images *)
(** Print info when processing image *)
val debug_image_processing : bool ref
(** Extract images. *)
val extract_images :
?raw:bool -> ?path_to_p2p:string -> ?path_to_im:string ->