From 847f498add6e87d97301897ffcda988de822b256 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 4 Jan 2024 19:31:21 +0000 Subject: [PATCH] First stab at dpi_threshold --- cpdfimage.ml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cpdfimage.ml b/cpdfimage.ml index 1b6dced..cdecab4 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -723,13 +723,28 @@ let process | `List l -> hashset_of_list (map (function `Assoc (("Object", `Int i)::_) -> i | _ -> assert false) l) | _ -> assert false in + let highdpi = + let objnums = + if dpi_threshold = 0 then [] else + let results = image_resolution pdf range max_float in + let cmp (_, _, _, _, _, _, a) (_, _, _, _, _, _, b) = compare a b in + (* Get each obj number associated with its *lowest* resolution *) + let sets = collate cmp (sort cmp results) in + let heads = map hd (map (sort (fun (_, _, _, _, a, b, _) (_, _, _, _, c, d, _) -> compare (fmin a b) (fmin c d))) sets) in + (* Choose only those where this figure is more than the dpi_threshold *) + (*iter (fun (_, _, _, _, wdpi, hdpi, objnum) -> Printf.printf "%f %f %i\n" wdpi hdpi objnum) heads;*) + let needed = keep (fun (_, _, _, _, wdpi, hdpi, objnum) -> fmin wdpi hdpi > float_of_int dpi_threshold) heads in + map (fun (_, _, _, _, _, _, objnum) -> objnum) needed + in + hashset_of_list objnums + in let nobjects = Pdf.objcard pdf in let ndone = ref 0 in let process_obj objnum s = match s with | Pdf.Stream ({contents = dict, _} as reference) -> ndone += 1; - if Hashtbl.mem inrange objnum then begin match + if Hashtbl.mem inrange objnum && (dpi_threshold = 0 || Hashtbl.mem highdpi objnum) then begin match Pdf.lookup_direct pdf "/Subtype" dict, Pdf.lookup_direct pdf "/Filter" dict, Pdf.lookup_direct pdf "/BitsPerComponent" dict,