Added -raw option for extract_images
This commit is contained in:
parent
1fe0c33924
commit
33c690343c
|
@ -4160,7 +4160,7 @@ let go () =
|
|||
in
|
||||
let pdf = get_single_pdf args.op true in
|
||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||
Cpdfimage.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
||||
Cpdfimage.extract_images ~raw:(args.encoding = Cpdfmetadata.Raw) ?path_to_p2p:(match args.path_to_p2p with "" -> None | x -> Some x) ?path_to_im:(match args.path_to_im with "" -> None | x -> Some x) args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
||||
| Some (ImageResolution f) ->
|
||||
let pdf = get_single_pdf args.op true in
|
||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||
|
|
25
cpdfimage.ml
25
cpdfimage.ml
|
@ -27,7 +27,7 @@ let write_stream name stream =
|
|||
Pdfio.bytes_to_output_channel fh stream;
|
||||
close_out fh
|
||||
|
||||
let write_image path_to_p2p path_to_im pdf resources name image =
|
||||
let write_image ~raw ?path_to_p2p ?path_to_im pdf resources name image =
|
||||
match Pdfimage.get_image_24bpp pdf resources image with
|
||||
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
||||
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
|
||||
|
@ -39,10 +39,11 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
|||
pnm_to_channel_24 fh w h stream;
|
||||
close_out fh;
|
||||
begin match path_to_p2p with
|
||||
| "" ->
|
||||
| None ->
|
||||
begin match path_to_im with
|
||||
"" -> Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n"
|
||||
| _ ->
|
||||
None ->
|
||||
if not raw then Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n"
|
||||
| Some path_to_im ->
|
||||
begin match
|
||||
Sys.command (Filename.quote_command path_to_im [pnm; png])
|
||||
with
|
||||
|
@ -52,7 +53,7 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
|||
Sys.remove pnm
|
||||
end
|
||||
end
|
||||
| _ ->
|
||||
| Some path_to_p2p ->
|
||||
begin match
|
||||
Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm])
|
||||
with
|
||||
|
@ -67,16 +68,16 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
|||
|
||||
let written = ref []
|
||||
|
||||
let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images =
|
||||
let extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images =
|
||||
let names = map
|
||||
(fun _ ->
|
||||
Cpdfbookmarks.name_of_spec
|
||||
encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
||||
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
||||
in
|
||||
iter2 (write_image path_to_p2p path_to_im pdf resources) names images
|
||||
iter2 (write_image ~raw ?path_to_p2p ?path_to_im pdf resources) names images
|
||||
|
||||
let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
||||
let rec extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
||||
let resources =
|
||||
match Pdf.lookup_direct pdf "/Resources" form with
|
||||
Some (Pdf.Dictionary d) -> Pdf.Dictionary d
|
||||
|
@ -95,9 +96,9 @@ let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_
|
|||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||
images
|
||||
in
|
||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images
|
||||
extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images
|
||||
|
||||
let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem =
|
||||
let extract_images ?(raw=false) ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf range stem =
|
||||
if dedup || dedup_per_page then written := [];
|
||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||
let pages =
|
||||
|
@ -119,8 +120,8 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang
|
|||
if dedup || dedup_per_page then
|
||||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
||||
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
||||
extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
||||
iter (extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
||||
pages
|
||||
(indx pages)
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
(** Images *)
|
||||
|
||||
(** Extract images. *)
|
||||
val extract_images : string ->
|
||||
string ->
|
||||
val extract_images : ?raw:bool -> ?path_to_p2p:string ->
|
||||
?path_to_im:string ->
|
||||
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
||||
|
||||
(** Report image resolutions. *)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
%Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json
|
||||
%Document subformat information
|
||||
%Document -list-images[-json], -list-image-uses[-json], -image-resolution-json
|
||||
%Document -raw for -extract-images
|
||||
\documentclass{book}
|
||||
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf,
|
||||
% dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.
|
||||
|
|
Loading…
Reference in New Issue