mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	Added -raw option for extract_images
This commit is contained in:
		| @@ -4160,7 +4160,7 @@ let go () = | |||||||
|       in |       in | ||||||
|         let pdf = get_single_pdf args.op true in |         let pdf = get_single_pdf args.op true in | ||||||
|           let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in |           let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in | ||||||
|             Cpdfimage.extract_images args.path_to_p2p args.path_to_im args.encoding args.dedup args.dedup_per_page pdf range output_spec |             Cpdfimage.extract_images ~raw:(args.encoding = Cpdfmetadata.Raw) ?path_to_p2p:(match args.path_to_p2p with "" -> None | x -> Some x) ?path_to_im:(match args.path_to_im with "" -> None | x -> Some x) args.encoding args.dedup args.dedup_per_page pdf range output_spec | ||||||
|   | Some (ImageResolution f) -> |   | Some (ImageResolution f) -> | ||||||
|       let pdf = get_single_pdf args.op true in |       let pdf = get_single_pdf args.op true in | ||||||
|         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in |         let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in | ||||||
|   | |||||||
							
								
								
									
										25
									
								
								cpdfimage.ml
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								cpdfimage.ml
									
									
									
									
									
								
							| @@ -27,7 +27,7 @@ let write_stream name stream = | |||||||
|     Pdfio.bytes_to_output_channel fh stream; |     Pdfio.bytes_to_output_channel fh stream; | ||||||
|     close_out fh |     close_out fh | ||||||
|  |  | ||||||
| let write_image path_to_p2p path_to_im pdf resources name image = | let write_image ~raw ?path_to_p2p ?path_to_im pdf resources name image = | ||||||
|   match Pdfimage.get_image_24bpp pdf resources image with |   match Pdfimage.get_image_24bpp pdf resources image with | ||||||
|   | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream |   | Pdfimage.JPEG (stream, _) -> write_stream (name ^ ".jpg") stream | ||||||
|   | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream |   | Pdfimage.JPEG2000 (stream, _) -> write_stream (name ^ ".jpx") stream | ||||||
| @@ -39,10 +39,11 @@ let write_image path_to_p2p path_to_im pdf resources name image = | |||||||
|         pnm_to_channel_24 fh w h stream; |         pnm_to_channel_24 fh w h stream; | ||||||
|         close_out fh; |         close_out fh; | ||||||
|         begin match path_to_p2p with |         begin match path_to_p2p with | ||||||
|         | "" -> |         | None -> | ||||||
|           begin match path_to_im with |           begin match path_to_im with | ||||||
|             "" -> Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" |             None -> | ||||||
|           | _ -> |               if not raw then Pdfe.log "Neither pnm2png nor imagemagick found. Specify with -p2p or -im\n" | ||||||
|  |           | Some path_to_im -> | ||||||
|             begin match |             begin match | ||||||
|               Sys.command (Filename.quote_command path_to_im [pnm; png]) |               Sys.command (Filename.quote_command path_to_im [pnm; png]) | ||||||
|             with |             with | ||||||
| @@ -52,7 +53,7 @@ let write_image path_to_p2p path_to_im pdf resources name image = | |||||||
|               Sys.remove pnm |               Sys.remove pnm | ||||||
|             end |             end | ||||||
|           end |           end | ||||||
|         | _ -> |         | Some path_to_p2p -> | ||||||
|           begin match |           begin match | ||||||
|             Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm]) |             Sys.command (Filename.quote_command path_to_p2p ~stdout:png ["-gamma"; "0.45"; "-quiet"; pnm]) | ||||||
|           with |           with | ||||||
| @@ -67,16 +68,16 @@ let write_image path_to_p2p path_to_im pdf resources name image = | |||||||
|  |  | ||||||
| let written = ref [] | let written = ref [] | ||||||
|  |  | ||||||
| let extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images = | let extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images = | ||||||
|   let names = map |   let names = map | ||||||
|     (fun _ -> |     (fun _ -> | ||||||
|        Cpdfbookmarks.name_of_spec |        Cpdfbookmarks.name_of_spec | ||||||
|          encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum) |          encoding [] pdf 0 (stem ^ "-p" ^ string_of_int pnum) | ||||||
|          (let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images) |          (let r = !serial in serial := !serial + 1; r) "" 0 0) (indx images) | ||||||
|   in |   in | ||||||
|     iter2 (write_image path_to_p2p path_to_im pdf resources) names images |     iter2 (write_image ~raw ?path_to_p2p ?path_to_im pdf resources) names images | ||||||
|  |  | ||||||
| let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum form = | let rec extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum form = | ||||||
|   let resources = |   let resources = | ||||||
|     match Pdf.lookup_direct pdf "/Resources" form with |     match Pdf.lookup_direct pdf "/Resources" form with | ||||||
|       Some (Pdf.Dictionary d) -> Pdf.Dictionary d |       Some (Pdf.Dictionary d) -> Pdf.Dictionary d | ||||||
| @@ -95,9 +96,9 @@ let rec extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_ | |||||||
|             written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; |             written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; | ||||||
|           images |           images | ||||||
|     in |     in | ||||||
|       extract_images_inner path_to_p2p path_to_im encoding serial pdf resources stem pnum images |       extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf resources stem pnum images | ||||||
|  |  | ||||||
| let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf range stem = | let extract_images ?(raw=false) ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf range stem = | ||||||
|   if dedup || dedup_per_page then written := []; |   if dedup || dedup_per_page then written := []; | ||||||
|   let pdf_pages = Pdfpage.pages_of_pagetree pdf in |   let pdf_pages = Pdfpage.pages_of_pagetree pdf in | ||||||
|     let pages = |     let pages = | ||||||
| @@ -119,8 +120,8 @@ let extract_images path_to_p2p path_to_im encoding dedup dedup_per_page pdf rang | |||||||
|                if dedup || dedup_per_page then |                if dedup || dedup_per_page then | ||||||
|                  written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; |                  written := (option_map (function Pdf.Indirect n -> Some n | _ -> None) images) @ !written; | ||||||
|                let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in |                let forms = keep (fun o -> Pdf.lookup_direct pdf "/Subtype" o = Some (Pdf.Name "/Form")) xobjects in | ||||||
|                  extract_images_inner path_to_p2p path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images; |                  extract_images_inner ~raw ?path_to_p2p ?path_to_im encoding serial pdf page.Pdfpage.resources stem pnum images; | ||||||
|                  iter (extract_images_form_xobject path_to_p2p path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) |                  iter (extract_images_form_xobject ~raw ?path_to_p2p ?path_to_im encoding dedup dedup_per_page pdf serial stem pnum) forms) | ||||||
|           pages |           pages | ||||||
|           (indx pages) |           (indx pages) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,8 +1,8 @@ | |||||||
| (** Images *) | (** Images *) | ||||||
|  |  | ||||||
| (** Extract images. *) | (** Extract images. *) | ||||||
| val extract_images : string -> | val extract_images : ?raw:bool -> ?path_to_p2p:string -> | ||||||
|            string -> |            ?path_to_im:string -> | ||||||
|            Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit |            Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit | ||||||
|  |  | ||||||
| (** Report image resolutions. *)         | (** Report image resolutions. *)         | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
| %Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json | %Document -info-json, -page-info-json, -page-labels-json, -list-fonts-json | ||||||
| %Document subformat information | %Document subformat information | ||||||
| %Document -list-images[-json], -list-image-uses[-json], -image-resolution-json | %Document -list-images[-json], -list-image-uses[-json], -image-resolution-json | ||||||
|  | %Document -raw for -extract-images | ||||||
| \documentclass{book} | \documentclass{book} | ||||||
| % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, | % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, | ||||||
| % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc. | % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user