From b8aaf29420a409063c9548166a445d98b75eb082 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 22 Dec 2023 16:45:53 +0000 Subject: [PATCH] Scaffolding for 1bpp reprocessing --- cpdfcommand.ml | 15 ++++++++++++--- cpdfimage.ml | 14 ++++++++++++-- cpdfimage.mli | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index ce7efc3..cd5e0bd 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -523,7 +523,8 @@ type args = mutable idir_only_pdfs : bool; mutable no_warn_rotate : bool; mutable jpegquality : int; - mutable jpegqualitylossless : int} + mutable jpegqualitylossless : int; + mutable onebppmethod : string} let args = {op = None; @@ -647,7 +648,8 @@ let args = idir_only_pdfs = false; no_warn_rotate = false; jpegquality = 100; - jpegqualitylossless = 100} + jpegqualitylossless = 100; + onebppmethod = ""} let reset_arguments () = args.op <- None; @@ -754,6 +756,7 @@ let reset_arguments () = args.idir_only_pdfs <- false; args.jpegquality <- 100; args.jpegqualitylossless <- 100; + args.onebppmethod <- ""; (* Do not reset original_filename or cpdflin or was_encrypted or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or gs_malformed or gs_quiet or no-warn-rotate, since we want these to work @@ -1927,6 +1930,9 @@ let setjpegquality q = let setjpegqualitylossless q = args.jpegqualitylossless <- q +let set1bppmethod m = + args.onebppmethod <- m + (* Parse a control file, make an argv, and then make Arg parse it. *) let rec make_control_argv_and_parse filename = control_args := !control_args @ parse_control_file filename @@ -2710,6 +2716,9 @@ and specs = ("-lossless-to-jpeg", Arg.Int setjpegqualitylossless, " Set JPEG quality for existing lossless images"); + ("-1bpp-method", + Arg.String set1bppmethod, + " Set 1bpp compression method for existing images"); ("-squeeze", Arg.Unit setsqueeze, " Squeeze"); @@ -4447,7 +4456,7 @@ let go () = write_pdf false (Cpdfchop.chop ~x ~y ~columns:args.impose_columns ~btt:args.impose_btt ~rtl:args.impose_rtl pdf range) | Some ProcessImages -> let pdf = get_single_pdf args.op false in - Cpdfimage.process pdf ~q:args.jpegquality ~qlossless:args.jpegqualitylossless ~path_to_convert:args.path_to_convert; + Cpdfimage.process pdf ~q:args.jpegquality ~qlossless:args.jpegqualitylossless ~onebppmethod:args.onebppmethod ~path_to_convert:args.path_to_convert; write_pdf false pdf (* Advise the user if a combination of command line flags makes little sense, diff --git a/cpdfimage.ml b/cpdfimage.ml index fc42c66..be110fe 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -590,10 +590,16 @@ let lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference = print_string (Printf.sprintf "%s (%s) [%s]\n" colspace bpc filter); () (* an image we cannot or do not handle *) +let recompress_1bpp_ccitt pdf s dict reference = + () + +let recompress_1bpp_jbig2_lossless pdf s dict reference = + () + (* JPEG to JPEG: RGB and CMYK JPEGS *) (* Lossless to JPEG: 8bpp Grey, 8bpp RGB, 8bpp CMYK including separation add ICCBased colourspaces *) (* 1 bit: anything to CCITT; anything to JBIG2 lossless (no globals yet) *) -let process ?q ?qlossless ?jbig2 pdf ~path_to_convert = +let process ?q ?qlossless ?onebppmethod pdf ~path_to_convert = let process_obj _ s = match s with | Pdf.Stream ({contents = dict, _} as reference) -> @@ -610,7 +616,11 @@ let process ?q ?qlossless ?jbig2 pdf ~path_to_convert = end | Some (Pdf.Name "/Image"), _, Some (Pdf.Integer 1), _ | Some (Pdf.Name "/Image"), _, _, Some (Pdf.Boolean true) -> - Printf.printf "1bpp\n" + begin match onebppmethod with + | Some "CCITT" -> recompress_1bpp_ccitt pdf s dict reference + | Some "JBIG2Lossless" -> recompress_1bpp_jbig2_lossless pdf s dict reference + | _ -> () + end | Some (Pdf.Name "/Image"), _, _, _ -> begin match qlossless with | Some qlossless -> lossless_to_jpeg pdf ~qlossless ~path_to_convert s dict reference diff --git a/cpdfimage.mli b/cpdfimage.mli index f13707b..5542d32 100644 --- a/cpdfimage.mli +++ b/cpdfimage.mli @@ -11,7 +11,7 @@ val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * (** List images in JSON format *) val images : Pdf.t -> int list -> Cpdfyojson.Safe.t -val process : ?q:int -> ?qlossless:int -> ?jbig2:int -> Pdf.t -> path_to_convert:string -> unit +val process : ?q:int -> ?qlossless:int -> ?onebppmethod:string -> Pdf.t -> path_to_convert:string -> unit (**/**) val image_of_input : (unit -> Pdfio.bytes -> Pdf.pdfobject * (int * Pdf.pdfobject) list) -> Pdfio.input -> Pdf.t