Fixes images-in-xobjects in -list-images-used etc.

This commit is contained in:
John Whitington
2024-11-22 16:30:22 +00:00
parent 8ad54e5dd6
commit 6434c099b7

View File

@ -195,7 +195,6 @@ type xobj =
let image_results = ref [] let image_results = ref []
let rec image_resolution_page pdf page pagenum images = let rec image_resolution_page pdf page pagenum images =
(*Printf.printf "image_resolution_page: page %i, %i images\n" pagenum (length images);*)
try try
let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content let pageops = Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content
and transform = ref [ref Pdftransform.i_matrix] in and transform = ref [ref Pdftransform.i_matrix] in
@ -241,7 +240,7 @@ let rec image_resolution_page pdf page pagenum images =
Pdfpage.rest = Pdf.Dictionary []} Pdfpage.rest = Pdf.Dictionary []}
in in
let newpdf = Pdfpage.change_pages false pdf [page] in let newpdf = Pdfpage.change_pages false pdf [page] in
image_resolution newpdf [1] image_resolution newpdf [1] pagenum
| (pagenum, name, Image (w, h), objnum) -> | (pagenum, name, Image (w, h), objnum) ->
let lx = Pdfunits.inches (distance_between o x) Pdfunits.PdfPoint in let lx = Pdfunits.inches (distance_between o x) Pdfunits.PdfPoint in
let ly = Pdfunits.inches (distance_between o y) Pdfunits.PdfPoint in let ly = Pdfunits.inches (distance_between o y) Pdfunits.PdfPoint in
@ -268,16 +267,14 @@ let rec image_resolution_page pdf page pagenum images =
with with
e -> Printf.printf "Error %s\n" (Printexc.to_string e); flprint "\n" e -> Printf.printf "Error %s\n" (Printexc.to_string e); flprint "\n"
and image_resolution pdf range = and image_resolution pdf range real_pagenum =
(*Printf.printf "image_resolution top\n";*)
let images = ref [] in let images = ref [] in
Cpdfpage.iter_pages Cpdfpage.iter_pages
(fun pagenum page -> (fun pagenum page ->
(*Printf.printf "Image resolution, page %i\n" pagenum;*) let pagenum = if real_pagenum > 0 then real_pagenum else pagenum in
(* 1. Get all image names and their native resolutions from resources as string * int * int *) (* 1. Get all image names and their native resolutions from resources as string * int * int *)
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary xobjects) -> | Some (Pdf.Dictionary xobjects) ->
(*Printf.printf "Found %i Xobjects in page resources\n" (length xobjects);*)
iter iter
(function (name, xobject) -> (function (name, xobject) ->
let objnum = match xobject with Pdf.Indirect i -> i | _ -> 0 in let objnum = match xobject with Pdf.Indirect i -> i | _ -> 0 in
@ -313,7 +310,7 @@ and image_resolution pdf range =
xobjects xobjects
| _ -> ()) | _ -> ())
pdf pdf
range; (if real_pagenum = 0 then range else [1]);
(* Now, split into differing pages, and call [image_resolution_page] on each one *) (* Now, split into differing pages, and call [image_resolution_page] on each one *)
let pagesplits = let pagesplits =
map map
@ -324,6 +321,7 @@ and image_resolution pdf range =
in in
iter iter
(function (pagenum, images) -> (function (pagenum, images) ->
let pagenum = if real_pagenum > 0 then 1 else pagenum in
let page = select pagenum pages in let page = select pagenum pages in
image_resolution_page pdf page pagenum images) image_resolution_page pdf page pagenum images)
pagesplits pagesplits
@ -333,8 +331,8 @@ let is_below_dpi dpi (_, _, _, _, wdpi, hdpi, _) =
let image_resolution pdf range dpi = let image_resolution pdf range dpi =
image_results := []; image_results := [];
image_resolution pdf range; image_resolution pdf range 0;
rev (keep (is_below_dpi dpi) !image_results) sort compare (rev (keep (is_below_dpi dpi) !image_results))
let image_resolution_json pdf range dpi = let image_resolution_json pdf range dpi =
let images = image_resolution pdf range dpi in let images = image_resolution pdf range dpi in