Added -raw option for extract_images
This commit is contained in:
parent
1fe0c33924
commit
33c690343c
|
@ -4160,7 +4160,7 @@ let go () =
|
||||||
in
|
in
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
Cpdfimage.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
Cpdfimage.extract_images ~raw:(args.encoding = Cpdfmetadata.Raw) ?path_to_p2p:(match args.path_to_p2p with "" -> None | x -> Some x) ?path_to_im:(match args.path_to_im with "" -> None | x -> Some x) args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
||||||
| Some (ImageResolution f) ->
|
| Some (ImageResolution f) ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
|
|
25
cpdfimage.ml
25
cpdfimage.ml
|
@ -27,7 +27,7 @@ let write_stream name stream =
|
||||||
Pdfio.bytes_to_output_channel fh stream;
|
Pdfio.bytes_to_output_channel fh stream;
|
||||||
close_out fh
|
close_out fh
|
||||||
|
|
||||||
let write_image path_to_p2p path_to_im pdf resources name image =
|
let write_image ~raw ?path_to_p2p ?path_to_im pdf resources name image =
|
||||||
match Pdfimage.get_image_24bpp pdf resources image with
|
match Pdfimage.get_image_24bpp pdf resources image with
|
||||||
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
||||||
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
|
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
|
||||||
|
@ -39,10 +39,11 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
||||||
pnm_to_channel_24 fh w h stream;
|
pnm_to_channel_24 fh w h stream;
|
||||||
close_out fh;
|
close_out fh;
|
||||||
begin match path_to_p2p with
|
begin match path_to_p2p with
|
||||||
| "" ->
|
| None ->
|
||||||
begin match path_to_im with
|
begin match path_to_im with
|
||||||
"" -> Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n"
|
None ->
|
||||||
| _ ->
|
if not raw then Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n"
|
||||||
|
| Some path_to_im ->
|
||||||
begin match
|
begin match
|
||||||
Sys.command (Filename.quote_command path_to_im [pnm; png])
|
Sys.command (Filename.quote_command path_to_im [pnm; png])
|
||||||
with
|
with
|
||||||
|
@ -52,7 +53,7 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
||||||
Sys.remove pnm
|
Sys.remove pnm
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
| _ ->
|
| Some path_to_p2p ->
|
||||||
begin match
|
begin match
|
||||||
Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm])
|
Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm])
|
||||||
with
|
with
|
||||||
|
@ -67,16 +68,16 @@ let write_image path_to_p2p path_to_im pdf resources name image =
|
||||||
|
|
||||||
let written = ref []
|
let written = ref []
|
||||||
|
|
||||||
let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images =
|
let extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images =
|
||||||
let names = map
|
let names = map
|
||||||
(fun _ ->
|
(fun _ ->
|
||||||
Cpdfbookmarks.name_of_spec
|
Cpdfbookmarks.name_of_spec
|
||||||
encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
||||||
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
||||||
in
|
in
|
||||||
iter2 (write_image path_to_p2p path_to_im pdf resources) names images
|
iter2 (write_image ~raw ?path_to_p2p ?path_to_im pdf resources) names images
|
||||||
|
|
||||||
let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
let rec extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
||||||
let resources =
|
let resources =
|
||||||
match Pdf.lookup_direct pdf "/Resources" form with
|
match Pdf.lookup_direct pdf "/Resources" form with
|
||||||
Some (Pdf.Dictionary d) -> Pdf.Dictionary d
|
Some (Pdf.Dictionary d) -> Pdf.Dictionary d
|
||||||
|
@ -95,9 +96,9 @@ let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_
|
||||||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||||
images
|
images
|
||||||
in
|
in
|
||||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images
|
extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images
|
||||||
|
|
||||||
let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem =
|
let extract_images ?(raw=false) ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf range stem =
|
||||||
if dedup || dedup_per_page then written := [];
|
if dedup || dedup_per_page then written := [];
|
||||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
let pages =
|
let pages =
|
||||||
|
@ -119,8 +120,8 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang
|
||||||
if dedup || dedup_per_page then
|
if dedup || dedup_per_page then
|
||||||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||||
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
||||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
||||||
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
iter (extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
||||||
pages
|
pages
|
||||||
(indx pages)
|
(indx pages)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
(** Images *)
|
(** Images *)
|
||||||
|
|
||||||
(** Extract images. *)
|
(** Extract images. *)
|
||||||
val extract_images : string ->
|
val extract_images : ?raw:bool -> ?path_to_p2p:string ->
|
||||||
string ->
|
?path_to_im:string ->
|
||||||
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
||||||
|
|
||||||
(** Report image resolutions. *)
|
(** Report image resolutions. *)
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
%Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json
|
%Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json
|
||||||
%Document subformat information
|
%Document subformat information
|
||||||
%Document -list-images[-json], -list-image-uses[-json], -image-resolution-json
|
%Document -list-images[-json], -list-image-uses[-json], -image-resolution-json
|
||||||
|
%Document -raw for -extract-images
|
||||||
\documentclass{book}
|
\documentclass{book}
|
||||||
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf,
|
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf,
|
||||||
% dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.
|
% dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc.
|
||||||
|
|
Loading…
Reference in New Issue