From cc718fdb2cb40d3be34f693142e6e65e6c83cfbf Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 6 Dec 2023 12:20:27 +0000 Subject: [PATCH] Scaffolding for image round tripping --- cpdfcommand.ml | 41 ++++++++++++++++++++++++++++++++++++++--- cpdfimage.ml | 2 ++ cpdfimage.mli | 2 ++ cpdfmanual.tex | 2 ++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 3ba78d2..46e14bc 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -230,6 +230,7 @@ type op = | Draw | Composition of bool | Chop of int * int + | ProcessImages let string_of_op = function | PrintFontEncoding _ -> "PrintFontEncoding" @@ -367,6 +368,7 @@ let string_of_op = function | Draw -> "Draw" | Composition _ -> "Composition" | Chop _ -> "Chop" + | ProcessImages -> "ProcessImages" (* Inputs: filename, pagespec. *) type input_kind = @@ -466,6 +468,7 @@ type args = mutable path_to_ghostscript : string; mutable path_to_im : string; mutable path_to_p2p : string; + mutable path_to_convert : string; mutable frombox : string option; mutable tobox : string option; mutable mediabox_if_missing : bool; @@ -518,7 +521,9 @@ type args = mutable toc_title : string; mutable toc_bookmark : bool; mutable idir_only_pdfs : bool; - mutable no_warn_rotate : bool} + mutable no_warn_rotate : bool; + mutable jpegquality : int; + mutable jpegqualitylossless : int} let args = {op = None; @@ -586,6 +591,7 @@ let args = path_to_ghostscript = ""; path_to_im = ""; path_to_p2p = ""; + path_to_convert = ""; frombox = None; tobox = None; mediabox_if_missing = false; @@ -639,7 +645,9 @@ let args = toc_title = "Table of Contents"; toc_bookmark = true; idir_only_pdfs = false; - no_warn_rotate = false} + no_warn_rotate = false; + jpegquality = 100; + jpegqualitylossless = 100} let reset_arguments () = args.op <- None; @@ -744,6 +752,8 @@ let reset_arguments () = args.toc_title <- "Table of Contents"; args.toc_bookmark <- true; args.idir_only_pdfs <- false; + args.jpegquality <- 100; + args.jpegqualitylossless <- 100; (* Do not reset original_filename or cpdflin or was_encrypted or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or gs_malformed or gs_quiet or no-warn-rotate, since we want these to work @@ -863,7 +873,7 @@ let banned banlist = function CopyBox|MediaBox|HardBox _|SetTrapped|SetUntrapped|Presentation| BlackText|BlackLines|BlackFills|CopyFont _|StampOn _|StampUnder _|StampAsXObject _| AddText _|ScaleContents _|AttachFile _| ThinLines _ | RemoveClipping | RemoveAllText - | Prepend _ | Postpend _ | Draw -> + | Prepend _ | Postpend _ | Draw | ProcessImages -> mem Pdfcrypt.NoEdit banlist let operation_allowed pdf banlist op = @@ -1512,6 +1522,9 @@ let setgspath p = let setimpath p = args.path_to_im <- p +let setconvertpath p = + args.path_to_convert <- p + let setp2ppath p = args.path_to_p2p <- p @@ -1908,6 +1921,12 @@ let set_jbig2_global f = let clear_jbig2_global () = jbig2_global := None +let setjpegquality q = + args.jpegquality <- q + +let setjpegqualitylossless q = + args.jpegqualitylossless <- q + (* Parse a control file, make an argv, and then make Arg parse it. *) let rec make_control_argv_and_parse filename = control_args := !control_args @ parse_control_file filename @@ -2679,6 +2698,18 @@ and specs = ("-dedup-perpage", Arg.Unit set_dedup_per_page, " Deduplicate extracted images per page only"); + ("-process-images", + Arg.Unit (setop ProcessImages), + " Process images within PDF"); + ("-convert", + Arg.String setconvertpath, + " Path to convert executable"); + ("-jpeg-to-jpeg", + Arg.Int setjpegquality, + " Set JPEG quality for existing JPEGs"); + ("-lossless-to-jpeg", + Arg.Int setjpegqualitylossless, + " Set JPEG quality for existing lossless images"); ("-squeeze", Arg.Unit setsqueeze, " Squeeze"); @@ -4414,6 +4445,10 @@ let go () = let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in write_pdf false (Cpdfchop.chop ~x ~y ~columns:args.impose_columns ~btt:args.impose_btt ~rtl:args.impose_rtl pdf range) + | Some ProcessImages -> + let pdf = get_single_pdf args.op false in + Cpdfimage.process pdf; + write_pdf false pdf (* Advise the user if a combination of command line flags makes little sense, or error out if it make no sense at all. *) diff --git a/cpdfimage.ml b/cpdfimage.ml index 17affbb..f6f464a 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -456,3 +456,5 @@ let image_of_input fobj i = in let pdf, pageroot = Pdfpage.add_pagetree [page] pdf in Pdfpage.add_root pageroot [] pdf + +let process pdf = () diff --git a/cpdfimage.mli b/cpdfimage.mli index cc1ac7e..b5f79c9 100644 --- a/cpdfimage.mli +++ b/cpdfimage.mli @@ -11,6 +11,8 @@ val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * (** List images in JSON format *) val images : Pdf.t -> int list -> Cpdfyojson.Safe.t +val process : Pdf.t -> unit + (**/**) val image_of_input : (unit -> Pdfio.bytes -> Pdf.pdfobject * (int * Pdf.pdfobject) list) -> Pdfio.input -> Pdf.t val obj_of_jpeg_data : Pdfio.bytes -> Pdf.pdfobject * (int * Pdf.pdfobject) list diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 1fa238c..3037407 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -8,6 +8,8 @@ %Document -chop %Replace -impose-rtl etc with -rtl etc. %Document -extract-images JBIG2 for how to use jbig2dec +%Document -jbig2 / -jbig2globals +%Document -process-images \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.