Split up process_images
This commit is contained in:
parent
5631cfd724
commit
f1b39a0c84
47
cpdfimage.ml
47
cpdfimage.ml
|
@ -492,14 +492,8 @@ let image_of_input fobj i =
|
||||||
(* FIXME What about predictors? Audit to see if files get smaller. *)
|
(* FIXME What about predictors? Audit to see if files get smaller. *)
|
||||||
(* FIXME if lossy only 5% smaller, ignore? Set this parameter... *)
|
(* FIXME if lossy only 5% smaller, ignore? Set this parameter... *)
|
||||||
(* FIXME error handling for Sys.remove, others *)
|
(* FIXME error handling for Sys.remove, others *)
|
||||||
(* FIXME DeviceCYMK - use the convert CYMK samples format *)
|
(* FIXME Use raw format for all, and make it fast *)
|
||||||
(* FIXME Test JPEG to JPEG on CYMK - is colourspace retained, or do we need to add -colorspace CMYK? *)
|
let jpeg_to_jpeg pdf ~q ~path_to_convert s dict reference =
|
||||||
let process pdf ~q ~qlossless ~path_to_convert =
|
|
||||||
let process_obj _ s =
|
|
||||||
match s with
|
|
||||||
| Pdf.Stream ({contents = dict, _} as reference) ->
|
|
||||||
begin match Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Filter" dict with
|
|
||||||
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]) ->
|
|
||||||
Pdf.getstream s;
|
Pdf.getstream s;
|
||||||
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
||||||
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
||||||
|
@ -526,10 +520,8 @@ let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
end;
|
end;
|
||||||
Sys.remove out;
|
Sys.remove out;
|
||||||
Sys.remove out2
|
Sys.remove out2
|
||||||
| Some (Pdf.Name "/Image"), _ ->
|
|
||||||
(* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *)
|
let suitable_num pdf dict =
|
||||||
let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in
|
|
||||||
let suitable_num =
|
|
||||||
match Pdf.lookup_direct pdf "/ColorSpace" dict with
|
match Pdf.lookup_direct pdf "/ColorSpace" dict with
|
||||||
| Some (Pdf.Name "/DeviceRGB") -> 3
|
| Some (Pdf.Name "/DeviceRGB") -> 3
|
||||||
| Some (Pdf.Name "/DeviceGray") -> 1
|
| Some (Pdf.Name "/DeviceGray") -> 1
|
||||||
|
@ -542,27 +534,31 @@ let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
| _ -> 0
|
| _ -> 0
|
||||||
end
|
end
|
||||||
| _ -> 0
|
| _ -> 0
|
||||||
in
|
|
||||||
begin match suitable_num, bpc with
|
let lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference =
|
||||||
|
(* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *)
|
||||||
|
let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in
|
||||||
|
let components = suitable_num pdf dict in
|
||||||
|
match components, bpc with
|
||||||
| (1 | 3 | 4), Some (Pdf.Integer 8) ->
|
| (1 | 3 | 4), Some (Pdf.Integer 8) ->
|
||||||
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
||||||
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
||||||
begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
|
begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
|
||||||
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
||||||
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
||||||
let out = Filename.temp_file "cpdf" "convertin" ^ (if suitable_num < 4 then ".pnm" else ".cmyk") in
|
let out = Filename.temp_file "cpdf" "convertin" ^ (if suitable_num pdf dict < 4 then ".pnm" else ".cmyk") in
|
||||||
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
||||||
let fh = open_out_bin out in
|
let fh = open_out_bin out in
|
||||||
let data = match s with Pdf.Stream {contents = _, Pdf.Got d} -> d | _ -> assert false in
|
let data = match s with Pdf.Stream {contents = _, Pdf.Got d} -> d | _ -> assert false in
|
||||||
(if suitable_num = 3 then pnm_to_channel_24 else
|
(if components = 3 then pnm_to_channel_24 else
|
||||||
if suitable_num = 4 then cmyk_to_channel_32 else pnm_to_channel_8) fh w h data;
|
if components = 4 then cmyk_to_channel_32 else pnm_to_channel_8) fh w h data;
|
||||||
close_out fh;
|
close_out fh;
|
||||||
let retcode =
|
let retcode =
|
||||||
let command =
|
let command =
|
||||||
(Filename.quote_command path_to_convert
|
(Filename.quote_command path_to_convert
|
||||||
((if suitable_num = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @
|
((if components = 4 then ["-depth"; "8"; "-size"; string_of_int w ^ "x" ^ string_of_int h] else []) @
|
||||||
[out; "-quality"; string_of_int qlossless ^ "%"] @
|
[out; "-quality"; string_of_int qlossless ^ "%"] @
|
||||||
(if suitable_num = 1 then ["-colorspace"; "Gray"] else if suitable_num = 4 then ["-colorspace"; "CMYK"] else []) @
|
(if components = 1 then ["-colorspace"; "Gray"] else if components = 4 then ["-colorspace"; "CMYK"] else []) @
|
||||||
[out2]))
|
[out2]))
|
||||||
in
|
in
|
||||||
(*Printf.printf "%S\n" command;*)
|
(*Printf.printf "%S\n" command;*)
|
||||||
|
@ -574,7 +570,7 @@ let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
let newsize = in_channel_length result in
|
let newsize = in_channel_length result in
|
||||||
if newsize < size then
|
if newsize < size then
|
||||||
begin
|
begin
|
||||||
Printf.printf "Lossless to JPEG %i -> %i (components %i) \n" size newsize suitable_num;
|
Printf.printf "Lossless to JPEG %i -> %i (components %i) \n" size newsize components;
|
||||||
reference :=
|
reference :=
|
||||||
(Pdf.add_dict_entry
|
(Pdf.add_dict_entry
|
||||||
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
||||||
|
@ -596,7 +592,16 @@ let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
in
|
in
|
||||||
print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);
|
print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);
|
||||||
() (* an image we cannot or do not handle *)
|
() (* an image we cannot or do not handle *)
|
||||||
end
|
|
||||||
|
let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
|
let process_obj _ s =
|
||||||
|
match s with
|
||||||
|
| Pdf.Stream ({contents = dict, _} as reference) ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Filter" dict with
|
||||||
|
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]) ->
|
||||||
|
jpeg_to_jpeg pdf ~q ~path_to_convert s dict reference
|
||||||
|
| Some (Pdf.Name "/Image"), _ ->
|
||||||
|
lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference
|
||||||
| _ -> () (* not an image *)
|
| _ -> () (* not an image *)
|
||||||
end
|
end
|
||||||
| _ -> () (* not a stream *)
|
| _ -> () (* not a stream *)
|
||||||
|
|
Loading…
Reference in New Issue