more
This commit is contained in:
parent
5d421f5147
commit
42e600e049
1
Changes
1
Changes
|
@ -1,5 +1,6 @@
|
||||||
Version 2.4 (to come)
|
Version 2.4 (to come)
|
||||||
|
|
||||||
|
o New operation -extract-images
|
||||||
o Optional Content Groups now preserved when merging / stamping pages
|
o Optional Content Groups now preserved when merging / stamping pages
|
||||||
o Listing, coalescing and modfying Optional Content Groups.
|
o Listing, coalescing and modfying Optional Content Groups.
|
||||||
o Vendored in tiny_json from Yoshihiro Imai via Jan Furuse
|
o Vendored in tiny_json from Yoshihiro Imai via Jan Furuse
|
||||||
|
|
|
@ -415,6 +415,8 @@ type args =
|
||||||
mutable outline : bool;
|
mutable outline : bool;
|
||||||
mutable linewidth : float;
|
mutable linewidth : float;
|
||||||
mutable path_to_ghostscript : string;
|
mutable path_to_ghostscript : string;
|
||||||
|
mutable path_to_im : string;
|
||||||
|
mutable path_to_p2p : string;
|
||||||
mutable frombox : string option;
|
mutable frombox : string option;
|
||||||
mutable tobox : string option;
|
mutable tobox : string option;
|
||||||
mutable mediabox_if_missing : bool;
|
mutable mediabox_if_missing : bool;
|
||||||
|
@ -515,6 +517,8 @@ let args =
|
||||||
outline = false;
|
outline = false;
|
||||||
linewidth = 1.0;
|
linewidth = 1.0;
|
||||||
path_to_ghostscript = "";
|
path_to_ghostscript = "";
|
||||||
|
path_to_im = "";
|
||||||
|
path_to_p2p = "";
|
||||||
frombox = None;
|
frombox = None;
|
||||||
tobox = None;
|
tobox = None;
|
||||||
mediabox_if_missing = false;
|
mediabox_if_missing = false;
|
||||||
|
@ -638,9 +642,10 @@ let reset_arguments () =
|
||||||
args.ocgrenamefrom <- "";
|
args.ocgrenamefrom <- "";
|
||||||
args.ocgrenameto <- ""
|
args.ocgrenameto <- ""
|
||||||
(* Do not reset original_filename or cpdflin or was_encrypted or
|
(* Do not reset original_filename or cpdflin or was_encrypted or
|
||||||
* was_decrypted_with_owner or recrypt or producer or creator or
|
* was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
|
||||||
* path_to_ghostscript or gs_malformed or gs_quiet, since we want these to work across
|
* gs_malformed or gs_quiet, since we want these to work across ANDs. Or
|
||||||
* ANDs. Or squeeze options: a little odd, but we want it to happen on eventual output. *)
|
* squeeze options: a little odd, but we want it to happen on eventual
|
||||||
|
* output. *)
|
||||||
|
|
||||||
let get_pagespec () =
|
let get_pagespec () =
|
||||||
match args.inputs with
|
match args.inputs with
|
||||||
|
@ -1316,6 +1321,12 @@ let setimageresolution f =
|
||||||
let setgspath p =
|
let setgspath p =
|
||||||
args.path_to_ghostscript <- p
|
args.path_to_ghostscript <- p
|
||||||
|
|
||||||
|
let setimpath p =
|
||||||
|
args.path_to_im <- p
|
||||||
|
|
||||||
|
let setp2ppath p =
|
||||||
|
args.path_to_p2p <- p
|
||||||
|
|
||||||
let settextvertical () =
|
let settextvertical () =
|
||||||
args.orientation <- Cpdf.Vertical
|
args.orientation <- Cpdf.Vertical
|
||||||
|
|
||||||
|
@ -2113,6 +2124,8 @@ and specs =
|
||||||
("-gs", Arg.String setgspath, " Path to gs executable");
|
("-gs", Arg.String setgspath, " Path to gs executable");
|
||||||
("-gs-malformed", Arg.Unit setgsmalformed, " Also try to reconstruct malformed files with gs");
|
("-gs-malformed", Arg.Unit setgsmalformed, " Also try to reconstruct malformed files with gs");
|
||||||
("-gs-quiet", Arg.Unit setgsquiet, " Make gs go into quiet mode");
|
("-gs-quiet", Arg.Unit setgsquiet, " Make gs go into quiet mode");
|
||||||
|
("-im", Arg.String setimpath, " Path to imagemagick executable");
|
||||||
|
("-p2p", Arg.String setp2ppath, " Path to pnmtopng executable");
|
||||||
("-squeeze", Arg.Unit setsqueeze, " Squeeze");
|
("-squeeze", Arg.Unit setsqueeze, " Squeeze");
|
||||||
("-squeeze-log-to", Arg.String setsqueezelogto, " Squeeze log location");
|
("-squeeze-log-to", Arg.String setsqueezelogto, " Squeeze log location");
|
||||||
("-squeeze-no-pagedata", Arg.Unit setsqueezepagedata, " Don't recompress pages");
|
("-squeeze-no-pagedata", Arg.Unit setsqueezepagedata, " Don't recompress pages");
|
||||||
|
@ -2770,12 +2783,6 @@ let pnm_to_channel_24 channel w h s =
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
let null_device =
|
|
||||||
match Sys.os_type with
|
|
||||||
| "Win32" -> "nul"
|
|
||||||
| _ -> "/dev/null"
|
|
||||||
|
|
||||||
(* cpdf -extract-images in.pdf 2-5 -o img%%% (FIXME: Add output spec. Document png stuff.) *)
|
|
||||||
let write_stream name stream =
|
let write_stream name stream =
|
||||||
let fh = open_out_bin name in
|
let fh = open_out_bin name in
|
||||||
for x = 0 to bytes_size stream - 1 do
|
for x = 0 to bytes_size stream - 1 do
|
||||||
|
@ -2783,6 +2790,9 @@ let write_stream name stream =
|
||||||
done;
|
done;
|
||||||
close_out fh
|
close_out fh
|
||||||
|
|
||||||
|
(* FIXME: File and command quoting check on unix, windows inc command.exe *)
|
||||||
|
(* FIXME: Doesn't cope with images within form xobjects *)
|
||||||
|
(* FIXME: Document it *)
|
||||||
let write_image pdf resources name image =
|
let write_image pdf resources name image =
|
||||||
match Pdfimage.get_image_24bpp pdf resources image with
|
match Pdfimage.get_image_24bpp pdf resources image with
|
||||||
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
||||||
|
@ -2792,16 +2802,44 @@ let write_image pdf resources name image =
|
||||||
let fh = open_out_bin (name ^ ".pnm") in
|
let fh = open_out_bin (name ^ ".pnm") in
|
||||||
pnm_to_channel_24 fh w h stream;
|
pnm_to_channel_24 fh w h stream;
|
||||||
close_out fh;
|
close_out fh;
|
||||||
(* If pnmtopng is present, convert the pnm to a PNG. *)
|
begin match args.path_to_p2p with
|
||||||
|
| "" ->
|
||||||
|
begin match args.path_to_im with
|
||||||
|
"" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n"
|
||||||
|
| _ ->
|
||||||
begin match
|
begin match
|
||||||
Sys.command ("pnmtopng -gamma 0.45 -quiet " ^ "\"" ^ name ^ ".pnm\"" ^ "> \"" ^ name ^ ".png\" 2>" ^ null_device)
|
Sys.command (args.path_to_im ^ " " ^ name ^ ".pnm" ^ " " ^ name ^ ".png")
|
||||||
|
with
|
||||||
|
0 -> Sys.remove (name ^ ".pnm");
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n";
|
||||||
|
Sys.remove (name ^ ".pnm")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| _ ->
|
||||||
|
begin match
|
||||||
|
Sys.command (args.path_to_p2p ^ " -gamma 0.45 -quiet " ^ "\"" ^ name ^ ".pnm\"" ^ "> \"" ^ name ^ ".png\"")
|
||||||
with
|
with
|
||||||
| 0 -> Sys.remove (name ^ ".pnm")
|
| 0 -> Sys.remove (name ^ ".pnm")
|
||||||
| _ -> ()
|
| _ ->
|
||||||
|
Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n";
|
||||||
|
Sys.remove (name ^ ".pnm")
|
||||||
end
|
end
|
||||||
| _ -> ()
|
end
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "Unsupported image type when extracting image %s " name
|
||||||
|
|
||||||
|
let extract_images_inner serial pdf resources stem pnum images =
|
||||||
|
let names = map
|
||||||
|
(fun _ ->
|
||||||
|
name_of_spec
|
||||||
|
[] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
||||||
|
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
||||||
|
in
|
||||||
|
iter2 (write_image pdf resources) names images
|
||||||
|
|
||||||
|
let rec extract_images_form_xobject pdf serial form = ()
|
||||||
|
|
||||||
(* FIXME: Doesn't cope with images within form xobjects *)
|
|
||||||
let extract_images pdf range stem =
|
let extract_images pdf range stem =
|
||||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
let pages =
|
let pages =
|
||||||
|
@ -2809,6 +2847,7 @@ let extract_images pdf range stem =
|
||||||
(function (i, pdf_pages) -> if mem i range then Some pdf_pages else None)
|
(function (i, pdf_pages) -> if mem i range then Some pdf_pages else None)
|
||||||
(combine (indx pdf_pages) pdf_pages)
|
(combine (indx pdf_pages) pdf_pages)
|
||||||
in
|
in
|
||||||
|
let serial = ref 0 in
|
||||||
iter2
|
iter2
|
||||||
(fun page pnum ->
|
(fun page pnum ->
|
||||||
let xobjects =
|
let xobjects =
|
||||||
|
@ -2816,17 +2855,11 @@ let extract_images pdf range stem =
|
||||||
| Some (Pdf.Dictionary elts) -> map snd elts
|
| Some (Pdf.Dictionary elts) -> map snd elts
|
||||||
| _ -> []
|
| _ -> []
|
||||||
in
|
in
|
||||||
let images =
|
let images = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects in
|
||||||
keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects
|
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
||||||
in
|
Printf.printf "Found %i form xobjects on page %i\n" (length forms) pnum;
|
||||||
if images <> [] then
|
extract_images_inner serial pdf page.Pdfpage.resources stem pnum images;
|
||||||
(let names =
|
iter (extract_images_form_xobject pdf serial) forms)
|
||||||
map
|
|
||||||
(function n ->
|
|
||||||
let r = name_of_spec [] pdf 0 ("p" ^ string_of_int pnum ^ "_" ^ stem) n "" 0 0 in r)
|
|
||||||
(indx images)
|
|
||||||
in
|
|
||||||
iter2 (write_image pdf page.Pdfpage.resources) names images))
|
|
||||||
pages
|
pages
|
||||||
(indx pages)
|
(indx pages)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue