First implementation of -lossless-to-jpeg
This commit is contained in:
parent
ec91b49bfe
commit
16278b03f6
57
cpdfimage.ml
57
cpdfimage.ml
|
@ -1,5 +1,6 @@
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
open Pdfio
|
open Pdfio
|
||||||
|
open Cpdferror
|
||||||
|
|
||||||
(* Extract Images. *)
|
(* Extract Images. *)
|
||||||
let pnm_to_channel_24 channel w h s =
|
let pnm_to_channel_24 channel w h s =
|
||||||
|
@ -22,7 +23,6 @@ let pnm_to_channel_24 channel w h s =
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
let jbig2_serial = ref 0
|
let jbig2_serial = ref 0
|
||||||
|
|
||||||
let jbig2_globals = null_hash ()
|
let jbig2_globals = null_hash ()
|
||||||
|
@ -464,6 +464,8 @@ let image_of_input fobj i =
|
||||||
(* FIXME Need the "is it smaller" check from Pdfcodec.encode here too? *)
|
(* FIXME Need the "is it smaller" check from Pdfcodec.encode here too? *)
|
||||||
(* FIXME (this appears to make the file larger than ./cpdf ~/repos/pdfs/PDFTests/main128fail.pdf -recrypt -o out.pdf. Why? Seems to not create new object streams. Make it do so, since this a compression mechanism? An empty Pdf.objiter should not blow up a file like this!) *)
|
(* FIXME (this appears to make the file larger than ./cpdf ~/repos/pdfs/PDFTests/main128fail.pdf -recrypt -o out.pdf. Why? Seems to not create new object streams. Make it do so, since this a compression mechanism? An empty Pdf.objiter should not blow up a file like this!) *)
|
||||||
(* For each image xobject, process it through convert to reduce size. *)
|
(* For each image xobject, process it through convert to reduce size. *)
|
||||||
|
(* FIXME What about predictors? Audit to see if files get smaller. *)
|
||||||
|
(* FIXME if lossy only 5% smaller, ignore? Set this parameter... *)
|
||||||
let process pdf ~q ~qlossless ~path_to_convert =
|
let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
let process_obj _ s =
|
let process_obj _ s =
|
||||||
match s with
|
match s with
|
||||||
|
@ -500,18 +502,57 @@ let process pdf ~q ~qlossless ~path_to_convert =
|
||||||
(* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *)
|
(* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *)
|
||||||
begin match Pdf.lookup_direct pdf "/ColorSpace" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict with
|
begin match Pdf.lookup_direct pdf "/ColorSpace" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict with
|
||||||
| Some (Pdf.Name "/DeviceRGB"), Some (Pdf.Integer 8) ->
|
| Some (Pdf.Name "/DeviceRGB"), Some (Pdf.Integer 8) ->
|
||||||
Printf.printf "Found a lossless(rgb, 8) image to JPEGify\n"
|
let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in
|
||||||
(* 1. Decompress it - check we succeeded, bail if not *)
|
(* 1. Decompress it - check we succeeded, bail if not *)
|
||||||
(* 1. Output to pnm *)
|
Pdfcodec.decode_pdfstream_until_unknown pdf s;
|
||||||
(* 2. Convert to JPEG with convert *)
|
begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None ->
|
||||||
(* 3. Check smaller, Read file, and build new dictionary - removing ColorSpace, BitsPerComponent replacing Filter *)
|
(* 1. Output to pnm *)
|
||||||
|
let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in
|
||||||
|
let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in
|
||||||
|
let fh = open_out_bin out in
|
||||||
|
let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in
|
||||||
|
let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in
|
||||||
|
let data = match s with Pdf.Stream {contents = _, Pdf.Got d} -> d | _ -> assert false in
|
||||||
|
pnm_to_channel_24 fh w h data;
|
||||||
|
close_out fh;
|
||||||
|
(* 2. Convert to JPEG with convert *)
|
||||||
|
let retcode =
|
||||||
|
let command =
|
||||||
|
(Filename.quote_command path_to_convert
|
||||||
|
[out; "-quality"; string_of_int qlossless ^ "%"; out2])
|
||||||
|
in
|
||||||
|
(*Printf.printf "%S\n" command;*)
|
||||||
|
Sys.command command
|
||||||
|
in
|
||||||
|
(* 3. Check smaller, Read file, and build new dictionary - removing ColorSpace, BitsPerComponent replacing Filter *)
|
||||||
|
if retcode = 0 then
|
||||||
|
begin
|
||||||
|
let result = open_in_bin out2 in
|
||||||
|
let newsize = in_channel_length result in
|
||||||
|
Printf.printf "Lossless to JPEG %i -> %i\n" size newsize;
|
||||||
|
if newsize < size then
|
||||||
|
reference :=
|
||||||
|
(Pdf.remove_dict_entry
|
||||||
|
(Pdf.remove_dict_entry
|
||||||
|
(Pdf.add_dict_entry
|
||||||
|
(Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize))
|
||||||
|
"/Filter"
|
||||||
|
(Pdf.Name "/DCTDecode"))
|
||||||
|
"/ColorSpace")
|
||||||
|
"/BitsPerComponent"),
|
||||||
|
Pdf.Got (Pdfio.bytes_of_input_channel result)
|
||||||
|
end;
|
||||||
|
(* 4. Clean up. *)
|
||||||
|
Sys.remove out;
|
||||||
|
Sys.remove out2
|
||||||
|
end
|
||||||
| colspace, bpc ->
|
| colspace, bpc ->
|
||||||
let colspace, bpc, filter =
|
(*let colspace, bpc, filter =
|
||||||
(match colspace with None -> "none" | Some x -> Pdfwrite.string_of_pdf x),
|
(match colspace with None -> "none" | Some x -> Pdfwrite.string_of_pdf x),
|
||||||
(match bpc with None -> "none" | Some x -> Pdfwrite.string_of_pdf x),
|
(match bpc with None -> "none" | Some x -> Pdfwrite.string_of_pdf x),
|
||||||
(match Pdf.lookup_direct pdf "/Filter" dict with None -> "none" | Some x -> Pdfwrite.string_of_pdf x)
|
(match Pdf.lookup_direct pdf "/Filter" dict with None -> "none" | Some x -> Pdfwrite.string_of_pdf x)
|
||||||
in
|
in
|
||||||
print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);
|
print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);*)
|
||||||
() (* an image we cannot or do not handle *)
|
() (* an image we cannot or do not handle *)
|
||||||
end
|
end
|
||||||
| _ -> () (* not an image *)
|
| _ -> () (* not an image *)
|
||||||
|
|
Loading…
Reference in New Issue