mirror of
https://github.com/johnwhitington/cpdf-source.git
synced 2025-06-05 22:09:39 +02:00
more
This commit is contained in:
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
|||||||
# Build the cpdf command line tools and top level
|
# Build the cpdf command line tools and top level
|
||||||
MODS = cpdfyojson cpdfxmlm \
|
MODS = cpdfyojson cpdfxmlm \
|
||||||
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
cpdfunicodedata cpdferror cpdfdebug cpdfjson cpdfstrftime cpdfcoord cpdfattach \
|
||||||
cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdffont cpdftype \
|
cpdfpagespec cpdfposition cpdfpresent cpdfmetadata cpdfbookmarks cpdfpage cpdfaddtext cpdf cpdfimage cpdffont cpdftype \
|
||||||
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfcommand
|
cpdftexttopdf cpdftoc cpdfpad cpdfocg cpdfsqueeze cpdfspot cpdfpagelabels cpdfcreate cpdfannot cpdfcommand
|
||||||
|
|
||||||
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
SOURCES = $(foreach x,$(MODS),$(x).ml $(x).mli) cpdfcommandrun.ml
|
||||||
|
190
cpdf.ml
190
cpdf.ml
@@ -2007,194 +2007,4 @@ let create_pdf pages pagesize =
|
|||||||
let pdf, pageroot = Pdfpage.add_pagetree (many page pages) (Pdf.empty ()) in
|
let pdf, pageroot = Pdfpage.add_pagetree (many page pages) (Pdf.empty ()) in
|
||||||
Pdfpage.add_root pageroot [] pdf
|
Pdfpage.add_root pageroot [] pdf
|
||||||
|
|
||||||
let get_bookmark_name encoding pdf marks splitlevel n _ =
|
|
||||||
let refnums = Pdf.page_reference_numbers pdf in
|
|
||||||
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
|
||||||
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
|
||||||
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters encoding title
|
|
||||||
| _ -> ""
|
|
||||||
|
|
||||||
(* @F means filename without extension *)
|
|
||||||
(* @N means sequence number with no padding *)
|
|
||||||
(* @S means start page of this section *)
|
|
||||||
(* @E means end page of this section *)
|
|
||||||
(* @B means bookmark name at start page *)
|
|
||||||
let process_others encoding marks pdf splitlevel filename sequence startpage endpage s =
|
|
||||||
let rec find_ats p = function
|
|
||||||
'@'::r -> find_ats (p + 1) r
|
|
||||||
| r -> (p, r)
|
|
||||||
in
|
|
||||||
let string_of_int_width w i =
|
|
||||||
if w < 0 then raise (Pdf.PDFError "width of field too narrow")
|
|
||||||
else if w > 8 then raise (Pdf.PDFError "width of field too broad") else
|
|
||||||
let formats =
|
|
||||||
[|format_of_string "%i";
|
|
||||||
format_of_string "%i";
|
|
||||||
format_of_string "%02i";
|
|
||||||
format_of_string "%03i";
|
|
||||||
format_of_string "%04i";
|
|
||||||
format_of_string "%05i";
|
|
||||||
format_of_string "%06i";
|
|
||||||
format_of_string "%07i";
|
|
||||||
format_of_string "%08i"|]
|
|
||||||
in
|
|
||||||
Printf.sprintf formats.(w) i
|
|
||||||
in
|
|
||||||
let rec procss prev = function
|
|
||||||
| [] -> rev prev
|
|
||||||
| '@'::'F'::t -> procss (rev (explode filename) @ prev) t
|
|
||||||
| '@'::'N'::t ->
|
|
||||||
let width, rest = find_ats 0 t in
|
|
||||||
procss (rev (explode (string_of_int_width width sequence)) @ prev) rest
|
|
||||||
| '@'::'S'::t ->
|
|
||||||
let width, rest = find_ats 0 t in
|
|
||||||
procss (rev (explode (string_of_int_width width startpage)) @ prev) rest
|
|
||||||
| '@'::'E'::t ->
|
|
||||||
let width, rest = find_ats 0 t in
|
|
||||||
procss (rev (explode (string_of_int_width width endpage)) @ prev) rest
|
|
||||||
| '@'::'B'::t -> procss (rev (explode (get_bookmark_name encoding pdf marks splitlevel startpage pdf)) @ prev) t
|
|
||||||
| h::t -> procss (h::prev) t
|
|
||||||
in
|
|
||||||
implode (procss [] (explode s))
|
|
||||||
|
|
||||||
let name_of_spec encoding marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
|
||||||
let fill l n =
|
|
||||||
let chars = explode (string_of_int n) in
|
|
||||||
if length chars > l
|
|
||||||
then implode (drop chars (length chars - l))
|
|
||||||
else implode ((many '0' (l - length chars)) @ chars)
|
|
||||||
in
|
|
||||||
let chars = explode spec in
|
|
||||||
let before, including = cleavewhile (neq '%') chars in
|
|
||||||
let percents, after = cleavewhile (eq '%') including in
|
|
||||||
if percents = []
|
|
||||||
then
|
|
||||||
process_others encoding marks pdf splitlevel filename n startpage endpage spec
|
|
||||||
else
|
|
||||||
process_others encoding marks pdf splitlevel filename n startpage endpage
|
|
||||||
(implode before ^ fill (length percents) n ^ implode after)
|
|
||||||
|
|
||||||
(* Extract Images. *)
|
|
||||||
let pnm_to_channel_24 channel w h s =
|
|
||||||
let white () = output_char channel ' '
|
|
||||||
and newline () = output_char channel '\n'
|
|
||||||
and output_string = Pervasives.output_string channel in
|
|
||||||
output_string "P6";
|
|
||||||
white ();
|
|
||||||
output_string (string_of_int w);
|
|
||||||
white ();
|
|
||||||
output_string (string_of_int h);
|
|
||||||
white ();
|
|
||||||
output_string "255";
|
|
||||||
newline ();
|
|
||||||
let pos = ref 0 in
|
|
||||||
for y = 1 to h do
|
|
||||||
for x = 1 to w * 3 do
|
|
||||||
output_byte channel (bget s !pos);
|
|
||||||
incr pos
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
let write_stream name stream =
|
|
||||||
let fh = open_out_bin name in
|
|
||||||
for x = 0 to bytes_size stream - 1 do
|
|
||||||
output_byte fh (bget stream x)
|
|
||||||
done;
|
|
||||||
close_out fh
|
|
||||||
|
|
||||||
let write_image path_to_p2p path_to_im pdf resources name image =
|
|
||||||
match Pdfimage.get_image_24bpp pdf resources image with
|
|
||||||
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
|
||||||
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
|
|
||||||
| Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream
|
|
||||||
| Pdfimage.Raw (w, h, Pdfimage.BPP24, stream) ->
|
|
||||||
let pnm = name ^ ".pnm" in
|
|
||||||
let png = name ^ ".png" in
|
|
||||||
let fh = open_out_bin pnm in
|
|
||||||
pnm_to_channel_24 fh w h stream;
|
|
||||||
close_out fh;
|
|
||||||
begin match path_to_p2p with
|
|
||||||
| "" ->
|
|
||||||
begin match path_to_im with
|
|
||||||
"" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n%!"
|
|
||||||
| _ ->
|
|
||||||
begin match
|
|
||||||
Sys.command (Filename.quote_command path_to_im [pnm; png])
|
|
||||||
with
|
|
||||||
0 -> Sys.remove pnm
|
|
||||||
| _ ->
|
|
||||||
Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n%!";
|
|
||||||
Sys.remove pnm
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| _ ->
|
|
||||||
begin match
|
|
||||||
Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm])
|
|
||||||
with
|
|
||||||
| 0 -> Sys.remove pnm
|
|
||||||
| _ ->
|
|
||||||
Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n%!";
|
|
||||||
Sys.remove pnm
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| _ ->
|
|
||||||
Printf.eprintf "Unsupported image type when extracting image %s %!" name
|
|
||||||
|
|
||||||
let written = ref []
|
|
||||||
|
|
||||||
let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images =
|
|
||||||
let names = map
|
|
||||||
(fun _ ->
|
|
||||||
name_of_spec
|
|
||||||
encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
|
||||||
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
|
||||||
in
|
|
||||||
iter2 (write_image path_to_p2p path_to_im pdf resources) names images
|
|
||||||
|
|
||||||
let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
|
||||||
let resources =
|
|
||||||
match Pdf.lookup_direct pdf "/Resources" form with
|
|
||||||
Some (Pdf.Dictionary d) -> Pdf.Dictionary d
|
|
||||||
| _ -> Pdf.Dictionary []
|
|
||||||
in
|
|
||||||
let images =
|
|
||||||
let xobjects =
|
|
||||||
match Pdf.lookup_direct pdf "/XObject" resources with
|
|
||||||
| Some (Pdf.Dictionary elts) -> map snd elts
|
|
||||||
| _ -> []
|
|
||||||
in
|
|
||||||
(* Remove any already in !written. Add any remaining to !written, if !args.dedup or !args.dedup_page *)
|
|
||||||
let images = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects in
|
|
||||||
let already_written, images = List.partition (function Pdf.Indirect n -> mem n !written | _ -> false) images in
|
|
||||||
if dedup || dedup_per_page then
|
|
||||||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
|
||||||
images
|
|
||||||
in
|
|
||||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images
|
|
||||||
|
|
||||||
let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem =
|
|
||||||
if dedup || dedup_per_page then written := [];
|
|
||||||
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
|
||||||
let pages =
|
|
||||||
option_map
|
|
||||||
(function (i, pdf_pages) -> if mem i range then Some pdf_pages else None)
|
|
||||||
(combine (indx pdf_pages) pdf_pages)
|
|
||||||
in
|
|
||||||
let serial = ref 0 in
|
|
||||||
iter2
|
|
||||||
(fun page pnum ->
|
|
||||||
if dedup_per_page then written := [];
|
|
||||||
let xobjects =
|
|
||||||
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
||||||
| Some (Pdf.Dictionary elts) -> map snd elts
|
|
||||||
| _ -> []
|
|
||||||
in
|
|
||||||
let images = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects in
|
|
||||||
let already_written, images = List.partition (function Pdf.Indirect n -> mem n !written | _ -> false) images in
|
|
||||||
if dedup || dedup_per_page then
|
|
||||||
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
|
||||||
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
|
||||||
extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
|
||||||
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
|
||||||
pages
|
|
||||||
(indx pages)
|
|
||||||
|
8
cpdf.mli
8
cpdf.mli
@@ -145,11 +145,3 @@ val remove_unused_resources : Pdf.t -> Pdf.t
|
|||||||
val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
|
val bookmarks_open_to_level : int -> Pdf.t -> Pdf.t
|
||||||
|
|
||||||
val create_pdf : int -> Pdfpaper.t -> Pdf.t
|
val create_pdf : int -> Pdfpaper.t -> Pdf.t
|
||||||
|
|
||||||
val name_of_spec : Cpdfmetadata.encoding ->
|
|
||||||
Pdfmarks.t list ->
|
|
||||||
Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
|
||||||
|
|
||||||
val extract_images : string ->
|
|
||||||
string ->
|
|
||||||
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
|
||||||
|
@@ -292,3 +292,70 @@ let split_on_bookmarks pdf level =
|
|||||||
in let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
in let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
let ranges = splitat points (indx pdf_pages) in
|
let ranges = splitat points (indx pdf_pages) in
|
||||||
map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges
|
map (fun rs -> Pdfpage.pdf_of_pages pdf rs) ranges
|
||||||
|
|
||||||
|
let get_bookmark_name encoding pdf marks splitlevel n _ =
|
||||||
|
let refnums = Pdf.page_reference_numbers pdf in
|
||||||
|
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
|
||||||
|
match keep (function m -> n = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target && m.Pdfmarks.level <= splitlevel) marks with
|
||||||
|
| {Pdfmarks.text = title}::_ -> Cpdfattach.remove_unsafe_characters encoding title
|
||||||
|
| _ -> ""
|
||||||
|
|
||||||
|
(* @F means filename without extension *)
|
||||||
|
(* @N means sequence number with no padding *)
|
||||||
|
(* @S means start page of this section *)
|
||||||
|
(* @E means end page of this section *)
|
||||||
|
(* @B means bookmark name at start page *)
|
||||||
|
let process_others encoding marks pdf splitlevel filename sequence startpage endpage s =
|
||||||
|
let rec find_ats p = function
|
||||||
|
'@'::r -> find_ats (p + 1) r
|
||||||
|
| r -> (p, r)
|
||||||
|
in
|
||||||
|
let string_of_int_width w i =
|
||||||
|
if w < 0 then raise (Pdf.PDFError "width of field too narrow")
|
||||||
|
else if w > 8 then raise (Pdf.PDFError "width of field too broad") else
|
||||||
|
let formats =
|
||||||
|
[|format_of_string "%i";
|
||||||
|
format_of_string "%i";
|
||||||
|
format_of_string "%02i";
|
||||||
|
format_of_string "%03i";
|
||||||
|
format_of_string "%04i";
|
||||||
|
format_of_string "%05i";
|
||||||
|
format_of_string "%06i";
|
||||||
|
format_of_string "%07i";
|
||||||
|
format_of_string "%08i"|]
|
||||||
|
in
|
||||||
|
Printf.sprintf formats.(w) i
|
||||||
|
in
|
||||||
|
let rec procss prev = function
|
||||||
|
| [] -> rev prev
|
||||||
|
| '@'::'F'::t -> procss (rev (explode filename) @ prev) t
|
||||||
|
| '@'::'N'::t ->
|
||||||
|
let width, rest = find_ats 0 t in
|
||||||
|
procss (rev (explode (string_of_int_width width sequence)) @ prev) rest
|
||||||
|
| '@'::'S'::t ->
|
||||||
|
let width, rest = find_ats 0 t in
|
||||||
|
procss (rev (explode (string_of_int_width width startpage)) @ prev) rest
|
||||||
|
| '@'::'E'::t ->
|
||||||
|
let width, rest = find_ats 0 t in
|
||||||
|
procss (rev (explode (string_of_int_width width endpage)) @ prev) rest
|
||||||
|
| '@'::'B'::t -> procss (rev (explode (get_bookmark_name encoding pdf marks splitlevel startpage pdf)) @ prev) t
|
||||||
|
| h::t -> procss (h::prev) t
|
||||||
|
in
|
||||||
|
implode (procss [] (explode s))
|
||||||
|
|
||||||
|
let name_of_spec encoding marks (pdf : Pdf.t) splitlevel spec n filename startpage endpage =
|
||||||
|
let fill l n =
|
||||||
|
let chars = explode (string_of_int n) in
|
||||||
|
if length chars > l
|
||||||
|
then implode (drop chars (length chars - l))
|
||||||
|
else implode ((many '0' (l - length chars)) @ chars)
|
||||||
|
in
|
||||||
|
let chars = explode spec in
|
||||||
|
let before, including = cleavewhile (neq '%') chars in
|
||||||
|
let percents, after = cleavewhile (eq '%') including in
|
||||||
|
if percents = []
|
||||||
|
then
|
||||||
|
process_others encoding marks pdf splitlevel filename n startpage endpage spec
|
||||||
|
else
|
||||||
|
process_others encoding marks pdf splitlevel filename n startpage endpage
|
||||||
|
(implode before ^ fill (length percents) n ^ implode after)
|
||||||
|
@@ -12,3 +12,7 @@ val add_bookmarks : json:bool -> bool -> Pdfio.input -> Pdf.t -> Pdf.t
|
|||||||
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
(** [list_bookmarks encoding range pdf output] lists the bookmarks to the given
|
||||||
output in the format specified in cpdfmanual.pdf *)
|
output in the format specified in cpdfmanual.pdf *)
|
||||||
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
val list_bookmarks : json:bool -> Cpdfmetadata.encoding -> int list -> Pdf.t -> Pdfio.output -> unit
|
||||||
|
|
||||||
|
val name_of_spec : Cpdfmetadata.encoding ->
|
||||||
|
Pdfmarks.t list ->
|
||||||
|
Pdf.t -> int -> string -> int -> string -> int -> int -> string
|
||||||
|
@@ -2883,7 +2883,7 @@ let fast_write_split_pdfs
|
|||||||
let pdf = Pdfpage.pdf_of_pages main_pdf pagenums in
|
let pdf = Pdfpage.pdf_of_pages main_pdf pagenums in
|
||||||
let startpage, endpage = extremes pagenums in
|
let startpage, endpage = extremes pagenums in
|
||||||
let name =
|
let name =
|
||||||
Cpdf.name_of_spec
|
Cpdfbookmarks.name_of_spec
|
||||||
args.encoding marks main_pdf splitlevel spec number
|
args.encoding marks main_pdf splitlevel spec number
|
||||||
(stem original_filename) startpage endpage
|
(stem original_filename) startpage endpage
|
||||||
in
|
in
|
||||||
@@ -3774,7 +3774,7 @@ let go () =
|
|||||||
in
|
in
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
Cpdf.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
Cpdfimage.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec
|
||||||
| Some (ImageResolution f) ->
|
| Some (ImageResolution f) ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
|
127
cpdfimage.ml
Normal file
127
cpdfimage.ml
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
open Pdfutil
|
||||||
|
open Pdfio
|
||||||
|
|
||||||
|
(* Extract Images. *)
|
||||||
|
let pnm_to_channel_24 channel w h s =
|
||||||
|
let white () = output_char channel ' '
|
||||||
|
and newline () = output_char channel '\n'
|
||||||
|
and output_string = Pervasives.output_string channel in
|
||||||
|
output_string "P6";
|
||||||
|
white ();
|
||||||
|
output_string (string_of_int w);
|
||||||
|
white ();
|
||||||
|
output_string (string_of_int h);
|
||||||
|
white ();
|
||||||
|
output_string "255";
|
||||||
|
newline ();
|
||||||
|
let pos = ref 0 in
|
||||||
|
for y = 1 to h do
|
||||||
|
for x = 1 to w * 3 do
|
||||||
|
output_byte channel (bget s !pos);
|
||||||
|
incr pos
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
let write_stream name stream =
|
||||||
|
let fh = open_out_bin name in
|
||||||
|
for x = 0 to bytes_size stream - 1 do
|
||||||
|
output_byte fh (bget stream x)
|
||||||
|
done;
|
||||||
|
close_out fh
|
||||||
|
|
||||||
|
let write_image path_to_p2p path_to_im pdf resources name image =
|
||||||
|
match Pdfimage.get_image_24bpp pdf resources image with
|
||||||
|
| Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream
|
||||||
|
| Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream
|
||||||
|
| Pdfimage.JBIG2 (stream, _) -> write_stream (name ^ ".jbig2") stream
|
||||||
|
| Pdfimage.Raw (w, h, Pdfimage.BPP24, stream) ->
|
||||||
|
let pnm = name ^ ".pnm" in
|
||||||
|
let png = name ^ ".png" in
|
||||||
|
let fh = open_out_bin pnm in
|
||||||
|
pnm_to_channel_24 fh w h stream;
|
||||||
|
close_out fh;
|
||||||
|
begin match path_to_p2p with
|
||||||
|
| "" ->
|
||||||
|
begin match path_to_im with
|
||||||
|
"" -> Printf.eprintf "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n%!"
|
||||||
|
| _ ->
|
||||||
|
begin match
|
||||||
|
Sys.command (Filename.quote_command path_to_im [pnm; png])
|
||||||
|
with
|
||||||
|
0 -> Sys.remove pnm
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "Call to imagemagick failed: did you specify -p2p correctly?\n%!";
|
||||||
|
Sys.remove pnm
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| _ ->
|
||||||
|
begin match
|
||||||
|
Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm])
|
||||||
|
with
|
||||||
|
| 0 -> Sys.remove pnm
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "Call to pnmtopng failed: did you specify -p2p correctly?\n%!";
|
||||||
|
Sys.remove pnm
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| _ ->
|
||||||
|
Printf.eprintf "Unsupported image type when extracting image %s %!" name
|
||||||
|
|
||||||
|
let written = ref []
|
||||||
|
|
||||||
|
let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images =
|
||||||
|
let names = map
|
||||||
|
(fun _ ->
|
||||||
|
Cpdfbookmarks.name_of_spec
|
||||||
|
encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum)
|
||||||
|
(let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images)
|
||||||
|
in
|
||||||
|
iter2 (write_image path_to_p2p path_to_im pdf resources) names images
|
||||||
|
|
||||||
|
let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form =
|
||||||
|
let resources =
|
||||||
|
match Pdf.lookup_direct pdf "/Resources" form with
|
||||||
|
Some (Pdf.Dictionary d) -> Pdf.Dictionary d
|
||||||
|
| _ -> Pdf.Dictionary []
|
||||||
|
in
|
||||||
|
let images =
|
||||||
|
let xobjects =
|
||||||
|
match Pdf.lookup_direct pdf "/XObject" resources with
|
||||||
|
| Some (Pdf.Dictionary elts) -> map snd elts
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
(* Remove any already in !written. Add any remaining to !written, if !args.dedup or !args.dedup_page *)
|
||||||
|
let images = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects in
|
||||||
|
let already_written, images = List.partition (function Pdf.Indirect n -> mem n !written | _ -> false) images in
|
||||||
|
if dedup || dedup_per_page then
|
||||||
|
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||||
|
images
|
||||||
|
in
|
||||||
|
extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images
|
||||||
|
|
||||||
|
let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem =
|
||||||
|
if dedup || dedup_per_page then written := [];
|
||||||
|
let pdf_pages = Pdfpage.pages_of_pagetree pdf in
|
||||||
|
let pages =
|
||||||
|
option_map
|
||||||
|
(function (i, pdf_pages) -> if mem i range then Some pdf_pages else None)
|
||||||
|
(combine (indx pdf_pages) pdf_pages)
|
||||||
|
in
|
||||||
|
let serial = ref 0 in
|
||||||
|
iter2
|
||||||
|
(fun page pnum ->
|
||||||
|
if dedup_per_page then written := [];
|
||||||
|
let xobjects =
|
||||||
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||||
|
| Some (Pdf.Dictionary elts) -> map snd elts
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
let images = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Image")) xobjects in
|
||||||
|
let already_written, images = List.partition (function Pdf.Indirect n -> mem n !written | _ -> false) images in
|
||||||
|
if dedup || dedup_per_page then
|
||||||
|
written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written;
|
||||||
|
let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in
|
||||||
|
extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images;
|
||||||
|
iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms)
|
||||||
|
pages
|
||||||
|
(indx pages)
|
3
cpdfimage.mli
Normal file
3
cpdfimage.mli
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
val extract_images : string ->
|
||||||
|
string ->
|
||||||
|
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
|
Reference in New Issue
Block a user