mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-06-05 22:09:39 +02:00
Work on lossless->JPEG
This commit is contained in:
79
cpdfimage.ml
79
cpdfimage.ml
@ -457,41 +457,54 @@ let image_of_input fobj i =
|
|||||||
let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in
|
let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in
|
||||||
Pdfpage.add_root pageroot [] pdf
|
Pdfpage.add_root pageroot [] pdf
|
||||||
|
|
||||||
(* For each image xobject, process it through convert to reduce JPEG quality if we can. *)
|
(* FIXME Make sure this process is ok for masks too - do we get them, is it allowed etc. *)
|
||||||
|
(* FIXME Only do if quality < 100 *)
|
||||||
|
(* For each image xobject, process it through convert to reduce size. *)
|
||||||
let process pdf ~q ~qlossless ~path_to_convert =
|
let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
let process_obj _ s =
|
let process_obj _ s =
|
||||||
match s with
|
match s with
|
||||||
| Pdf.Stream ({contents = dict, _} as reference) ->
|
| Pdf.Stream ({contents = dict, _} as reference) ->
|
||||||
begin match Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Filter" dict with
|
begin match Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Filter" dict with
|
||||||
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]) ->
|
| Some (Pdf.Name "/Image"), Some (Pdf.Name "/DCTDecode" | Pdf.Array [Pdf.Name "/DCTDecode"]) ->
|
||||||
Pdf.getstream s;
|
Pdf.getstream s;
|
||||||
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
let out = Filename.temp_file "cpdf" "convertin" ^ ".jpg" in
|
||||||
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
||||||
let fh = open_out_bin out in
|
let fh = open_out_bin out in
|
||||||
let size =
|
let size =
|
||||||
begin match s with Pdf.Stream {contents = _, Pdf.Got d} -> Pdfio.bytes_to_output_channel fh d; bytes_size d | _ -> 0 end
|
begin match s with Pdf.Stream {contents = _, Pdf.Got d} -> Pdfio.bytes_to_output_channel fh d; bytes_size d | _ -> 0 end
|
||||||
in
|
|
||||||
close_out fh;
|
|
||||||
let retcode =
|
|
||||||
let command =
|
|
||||||
(Filename.quote_command path_to_convert
|
|
||||||
[out; "-quality"; string_of_int q ^ "%"; out2])
|
|
||||||
in
|
in
|
||||||
(*Printf.printf "%S\n" command;*)
|
close_out fh;
|
||||||
Sys.command command
|
let retcode =
|
||||||
in
|
let command =
|
||||||
if retcode = 0 then
|
(Filename.quote_command path_to_convert
|
||||||
begin
|
[out; "-quality"; string_of_int q ^ "%"; out2])
|
||||||
let result = open_in_bin out2 in
|
in
|
||||||
let newsize = in_channel_length result in
|
(*Printf.printf "%S\n" command;*)
|
||||||
if newsize < size then
|
Sys.command command
|
||||||
(*Printf.printf "%i -> %i\n" size newsize;*)
|
in
|
||||||
reference := Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize), Pdf.Got (Pdfio.bytes_of_input_channel result)
|
if retcode = 0 then
|
||||||
end;
|
begin
|
||||||
Sys.remove out;
|
let result = open_in_bin out2 in
|
||||||
Sys.remove out2
|
let newsize = in_channel_length result in
|
||||||
| _ -> ()
|
if newsize < size then
|
||||||
end
|
(*Printf.printf "%i -> %i\n" size newsize;*)
|
||||||
| _ -> ()
|
reference := Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize), Pdf.Got (Pdfio.bytes_of_input_channel result)
|
||||||
|
end;
|
||||||
|
Sys.remove out;
|
||||||
|
Sys.remove out2
|
||||||
|
| Some (Pdf.Name "/Image"), _ ->
|
||||||
|
(* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *)
|
||||||
|
begin match Pdf.lookup_direct pdf "/ColorSpace" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict with
|
||||||
|
| Some (Pdf.Name "/DeviceRGB"), Some (Pdf.Integer 8) ->
|
||||||
|
Printf.printf "Found a lossless(rgb, 8) image to JPEGify\n"
|
||||||
|
(* 1. Decompress it - check we succeeded, bail if not *)
|
||||||
|
(* 1. Output to pnm *)
|
||||||
|
(* 2. Convert to JPEG with convert *)
|
||||||
|
(* 3. Check smaller, Read file, and build new dictionary - removing ColorSpace, BitsPerComponent replacing Filter *)
|
||||||
|
| _ -> Printf.printf "I"
|
||||||
|
end
|
||||||
|
| _ -> () (* not an image *)
|
||||||
|
end
|
||||||
|
| _ -> () (* not a stream *)
|
||||||
in
|
in
|
||||||
Pdf.objiter process_obj pdf
|
Pdf.objiter process_obj pdf
|
||||||
|
Reference in New Issue
Block a user