From d96b6aabbb26861e449a722970aeaa6eceb17839 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 18 Dec 2023 20:50:52 +0000 Subject: [PATCH] Detection for ICCBased colour spaces --- cpdfimage.ml | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/cpdfimage.ml b/cpdfimage.ml index 1f3576c..cd12ed3 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -500,13 +500,22 @@ let process pdf ~q ~qlossless ~path_to_convert = Sys.remove out2 | Some (Pdf.Name "/Image"), _ -> (* 0. Test if this is one we can do - for now just Colourspace=RGB, BPC=8 *) - begin match Pdf.lookup_direct pdf "/ColorSpace" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict with - | Some (Pdf.Name "/DeviceRGB"), Some (Pdf.Integer 8) -> + let bpc = Pdf.lookup_direct pdf "/BitsPerComponent" dict in + let is_rgb = + match Pdf.lookup_direct pdf "/ColorSpace" dict with + | Some (Pdf.Name "/DeviceRGB") -> true + | Some (Pdf.Array [Pdf.Name "/ICCBased"; stream]) -> + begin match Pdf.lookup_direct pdf "/N" stream with + | Some (Pdf.Integer 3) -> true + | _ -> false + end + | _ -> false + in + begin match is_rgb, bpc with + | true, Some (Pdf.Integer 8) -> let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in - (* 1. Decompress it - check we succeeded, bail if not *) Pdfcodec.decode_pdfstream_until_unknown pdf s; begin match Pdf.lookup_direct pdf "/Filter" (fst !reference) with Some _ -> () | None -> - (* 1. Output to pnm *) let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in let out2 = Filename.temp_file "cpdf" "convertout" ^ ".jpg" in let fh = open_out_bin out in @@ -515,7 +524,6 @@ let process pdf ~q ~qlossless ~path_to_convert = let data = match s with Pdf.Stream {contents = _, Pdf.Got d} -> d | _ -> assert false in pnm_to_channel_24 fh w h data; close_out fh; - (* 2. Convert to JPEG with convert *) let retcode = let command = (Filename.quote_command path_to_convert @@ -524,35 +532,30 @@ let process pdf ~q ~qlossless ~path_to_convert = (*Printf.printf "%S\n" command;*) Sys.command command in - (* 3. Check smaller, Read file, and build new dictionary - removing ColorSpace, BitsPerComponent replacing Filter *) if retcode = 0 then begin let result = open_in_bin out2 in let newsize = in_channel_length result in - Printf.printf "Lossless to JPEG %i -> %i\n" size newsize; if newsize < size then + Printf.printf "Lossless to JPEG %i -> %i\n" size newsize; reference := - (Pdf.remove_dict_entry - (Pdf.remove_dict_entry - (Pdf.add_dict_entry - (Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize)) - "/Filter" - (Pdf.Name "/DCTDecode")) - "/ColorSpace") - "/BitsPerComponent"), + (Pdf.add_dict_entry + (Pdf.add_dict_entry dict "/Length" (Pdf.Integer newsize)) + "/Filter" + (Pdf.Name "/DCTDecode")), Pdf.Got (Pdfio.bytes_of_input_channel result) end; - (* 4. Clean up. *) Sys.remove out; Sys.remove out2 end | colspace, bpc -> - (*let colspace, bpc, filter = + let colspace = Pdf.lookup_direct pdf "/ColorSpace" dict in + let colspace, bpc, filter = (match colspace with None -> "none" | Some x -> Pdfwrite.string_of_pdf x), (match bpc with None -> "none" | Some x -> Pdfwrite.string_of_pdf x), (match Pdf.lookup_direct pdf "/Filter" dict with None -> "none" | Some x -> Pdfwrite.string_of_pdf x) in - print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter);*) + print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter); () (* an image we cannot or do not handle *) end | _ -> () (* not an image *)