From 33c690343cf3edcd6bf76b18b5d28181a649ac2b Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 10 Nov 2023 13:46:52 +0000 Subject: [PATCH] Added -raw option for extract_images --- cpdfcommand.ml | 2 +- cpdfimage.ml | 25 +++++++++++++------------ cpdfimage.mli | 4 ++-- cpdfmanual.tex | 1 + 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index d696d32..f4be454 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4160,7 +4160,7 @@ let go () = in let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in - Cpdfimage.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec + Cpdfimage.extract_images ~raw:(args.encoding = Cpdfmetadata.Raw) ?path_to_p2p:(match args.path_to_p2p with "" -> None | x -> Some x) ?path_to_im:(match args.path_to_im with "" -> None | x -> Some x) args.encoding args.dedup args.dedup_per_page pdf range output_spec | Some (ImageResolution f) -> let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in diff --git a/cpdfimage.ml b/cpdfimage.ml index 2b329d8..3841e63 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -27,7 +27,7 @@ let write_stream name stream = Pdfio.bytes_to_output_channel fh stream; close_out fh -let write_image path_to_p2p path_to_im pdf resources name image = +let write_image ~raw ?path_to_p2p ?path_to_im pdf resources name image = match Pdfimage.get_image_24bpp pdf resources image with | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream @@ -39,10 +39,11 @@ let write_image path_to_p2p path_to_im pdf resources name image = pnm_to_channel_24 fh w h stream; close_out fh; begin match path_to_p2p with - | "" -> + | None -> begin match path_to_im with - "" -> Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" - | _ -> + None -> + if not raw then Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" + | Some path_to_im -> begin match Sys.command (Filename.quote_command path_to_im [pnm; png]) with @@ -52,7 +53,7 @@ let write_image path_to_p2p path_to_im pdf resources name image = Sys.remove pnm end end - | _ -> + | Some path_to_p2p -> begin match Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm]) with @@ -67,16 +68,16 @@ let write_image path_to_p2p path_to_im pdf resources name image = let written = ref [] -let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images = +let extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images = let names = map (fun _ -> Cpdfbookmarks.name_of_spec encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum) (let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images) in - iter2 (write_image path_to_p2p path_to_im pdf resources) names images + iter2 (write_image ~raw ?path_to_p2p ?path_to_im pdf resources) names images -let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form = +let rec extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum form = let resources = match Pdf.lookup_direct pdf "/Resources" form with Some (Pdf.Dictionary d) -> Pdf.Dictionary d @@ -95,9 +96,9 @@ let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_ written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; images in - extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images + extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images -let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem = +let extract_images ?(raw=false) ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf range stem = if dedup || dedup_per_page then written := []; let pdf_pages = Pdfpage.pages_of_pagetree pdf in let pages = @@ -119,8 +120,8 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang if dedup || dedup_per_page then written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in - extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images; - iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) + extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images; + iter (extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) pages (indx pages) diff --git a/cpdfimage.mli b/cpdfimage.mli index 3a70306..5e1ffcd 100644 --- a/cpdfimage.mli +++ b/cpdfimage.mli @@ -1,8 +1,8 @@ (** Images *) (** Extract images. *) -val extract_images : string -> - string -> +val extract_images : ?raw:bool -> ?path_to_p2p:string -> + ?path_to_im:string -> Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit (** Report image resolutions. *) diff --git a/cpdfmanual.tex b/cpdfmanual.tex index 38b54ab..cbb76e4 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -3,6 +3,7 @@ %Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json %Document subformat information %Document -list-images[-json], -list-image-uses[-json], -image-resolution-json +%Document -raw for -extract-images \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.