mirror of
				https://github.com/johnwhitington/cpdf-source.git
				synced 2025-06-05 22:09:39 +02:00 
			
		
		
		
	More jbig2lossy work
This commit is contained in:
		
							
								
								
									
										52
									
								
								cpdfimage.ml
									
									
									
									
									
								
							
							
						
						
									
										52
									
								
								cpdfimage.ml
									
									
									
									
									
								
							| @@ -710,11 +710,53 @@ let recompress_1bpp_jbig2_lossless ~pixel_threshold ~length_threshold ~path_to_j | |||||||
|           remove out2 |           remove out2 | ||||||
|   end |   end | ||||||
|  |  | ||||||
| let preprocess_jbig2_lossy ~path_to_jbig2enc inrange highdpi pdf = () | (* Recompress 1bpp images, compressed any way (none, flate, ccitt, jbig2 lossless) to lossy jbig2 *) | ||||||
|  | (* For now, just example 6406.pdf, which is CCITT, so simple decompression will do. For lossless | ||||||
|  | to lossy JBIG2, we will need to write out and convert to PNM with jbig2dec *) | ||||||
|  | (* FIXME: Need interface for jbig2 lossy parameters *) | ||||||
|  | let preprocess_jbig2_lossy ~path_to_jbig2enc ~length_threshold ~pixel_threshold ~dpi_threshold inrange highdpi pdf = | ||||||
|  |  let objnum_name_pairs = ref [] in | ||||||
|  |  let process_obj objnum s = | ||||||
|  (* Write out each stream as a *.pnm, if we choose to process it, restoring if not. *) |  (* Write out each stream as a *.pnm, if we choose to process it, restoring if not. *) | ||||||
|  (* Call jbig2 to generate one *.jbig2 for each, and a *.jbig2globals *) |     match s with | ||||||
|  (* Build the JBIG2Globals stream for the file *) |     | Pdf.Stream ({contents = dict, _} as reference) -> | ||||||
|  (* For each file, read in the new JBIG2 data, and build each new image stream to replace the old one *) |         let old = !reference in | ||||||
|  |         let restore () = reference := old in | ||||||
|  |         if Hashtbl.mem inrange objnum && (dpi_threshold = 0 || Hashtbl.mem highdpi objnum) then begin match | ||||||
|  |           Pdf.lookup_direct pdf "/Subtype" dict, | ||||||
|  |           Pdf.lookup_direct pdf "/BitsPerComponent" dict, | ||||||
|  |           Pdf.lookup_direct pdf "/ImageMask" dict | ||||||
|  |         with | ||||||
|  |         | Some (Pdf.Name "/Image"), Some (Pdf.Integer 1), _ | ||||||
|  |         | Some (Pdf.Name "/Image"), _, Some (Pdf.Boolean true) -> | ||||||
|  |             let w = match Pdf.lookup_direct pdf "/Width" dict with Some (Pdf.Integer i) -> i | _ -> error "bad width" in | ||||||
|  |             let h = match Pdf.lookup_direct pdf "/Height" dict with Some (Pdf.Integer i) -> i | _ -> error "bad height" in | ||||||
|  |             if w * h < pixel_threshold then (if !debug_image_processing then Printf.printf "pixel threshold not met\n%!") else (* (but also, jbig2enc fails on tiny images) *) | ||||||
|  |             let size = match Pdf.lookup_direct pdf "/Length" dict with Some (Pdf.Integer i) -> i | _ -> 0 in | ||||||
|  |             if size < length_threshold then (if !debug_image_processing then Printf.printf "length threshold not met\n%!") else | ||||||
|  |               begin | ||||||
|  |                 Pdfcodec.decode_pdfstream_until_unknown pdf s; | ||||||
|  |                 match Pdf.lookup_direct pdf "/Filter" (fst !reference) with | ||||||
|  |                 | Some x -> | ||||||
|  |                     if !debug_image_processing then Printf.printf "could not decode - skipping %s length %i\n%!" (Pdfwrite.string_of_pdf x) size; | ||||||
|  |                     restore () | ||||||
|  |                 | None -> | ||||||
|  |                     let out = Filename.temp_file "cpdf" "convertin" ^ ".pnm" in | ||||||
|  |                     let fh = open_out_bin out in | ||||||
|  |                     let data = match s with Pdf.Stream {contents = _, Pdf.Got d} -> d | _ -> assert false in | ||||||
|  |                       pnm_to_channel_1_inverted fh w h data; | ||||||
|  |                       close_out fh; | ||||||
|  |                       Printf.printf "obj %i = %s\n%!" objnum out; | ||||||
|  |                       objnum_name_pairs := (objnum, out)::!objnum_name_pairs | ||||||
|  |               end | ||||||
|  |         | _ -> () (* not a 1bpp image *) | ||||||
|  |         end | ||||||
|  |     | _ -> () (* not a stream *) | ||||||
|  |  in | ||||||
|  |    Pdf.objiter process_obj pdf | ||||||
|  |    (* Call jbig2 to generate one *.jbig2 for each, and a *.jbig2globals *) | ||||||
|  |    (* Build the JBIG2Globals stream for the file *) | ||||||
|  |    (* For each file, read in the new JBIG2 data, and build each new image stream to replace the old one *) | ||||||
|  |  | ||||||
| let process | let process | ||||||
|   ?q ?qlossless ?onebppmethod ~length_threshold ~percentage_threshold ~pixel_threshold ~dpi_threshold |   ?q ?qlossless ?onebppmethod ~length_threshold ~percentage_threshold ~pixel_threshold ~dpi_threshold | ||||||
| @@ -740,7 +782,7 @@ let process | |||||||
|     in |     in | ||||||
|       hashset_of_list objnums |       hashset_of_list objnums | ||||||
|   in |   in | ||||||
|   begin match onebppmethod with Some "JBIG2Lossy" -> preprocess_jbig2_lossy ~path_to_jbig2enc inrange highdpi pdf | _ -> () end; |   begin match onebppmethod with Some "JBIG2Lossy" -> preprocess_jbig2_lossy ~path_to_jbig2enc ~dpi_threshold ~length_threshold ~pixel_threshold inrange highdpi pdf | _ -> () end; | ||||||
|   let nobjects = Pdf.objcard pdf in |   let nobjects = Pdf.objcard pdf in | ||||||
|   let ndone = ref 0 in |   let ndone = ref 0 in | ||||||
|   let process_obj objnum s = |   let process_obj objnum s = | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user