First working -list-images-json

This commit is contained in:
John Whitington 2023-11-14 16:45:49 +00:00
parent 29fdfdef95
commit ab405a7330
1 changed files with 30 additions and 13 deletions

View File

@ -272,6 +272,8 @@ let image_resolution pdf range dpi =
image_resolution pdf range dpi; image_resolution pdf range dpi;
rev !image_results rev !image_results
(* FIXME Add colourspaces and anything else relevant *)
(* All the images in file referenced at least once from the given range of pages. *)
let images pdf range = let images pdf range =
let images = null_hash () in let images = null_hash () in
Cpdfpage.iter_pages Cpdfpage.iter_pages
@ -284,7 +286,10 @@ let images pdf range =
| Some (Pdf.Name "/Image") -> | Some (Pdf.Name "/Image") ->
begin match xobject with begin match xobject with
| Pdf.Indirect i -> | Pdf.Indirect i ->
(* FIXME: Only if we have not seen i before *) begin match Hashtbl.find images i with
| (pagenums, n, w, h) ->
Hashtbl.replace images i (pagenum::pagenums, n, w, h)
| exception Not_found ->
let width = let width =
match Pdf.lookup_direct pdf "/Width" xobject with match Pdf.lookup_direct pdf "/Width" xobject with
| Some x -> Pdf.getnum pdf x | Some x -> Pdf.getnum pdf x
@ -294,8 +299,8 @@ let images pdf range =
| Some x -> Pdf.getnum pdf x | Some x -> Pdf.getnum pdf x
| None -> 1. | None -> 1.
in in
(* FIXME: Store which pages it is referenced from. *) Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height)
Hashtbl.replace images i (pagenum, name, int_of_float width, int_of_float height) end
| _ -> () | _ -> ()
end end
(* FIXME Look into form xobjects recursively *) (* FIXME Look into form xobjects recursively *)
@ -304,7 +309,19 @@ let images pdf range =
| _ -> ()) | _ -> ())
pdf pdf
range; range;
`Null (* Sort page numbers, then sort by first page number appearing, and build JSON structure *)
let images = list_of_hashtbl images in
let images = map (fun (i, (pnums, n, w, h)) -> (i, (setify (sort compare pnums), n, w, h))) images in
let images = sort (fun (_, (pnums, _, _, _)) (_, (pnums', _, _, _)) -> compare (hd pnums) (hd pnums')) images in
`List
(map
(fun (i, (pnums, n, w, h)) ->
`Assoc [("Object", `Int i);
("Pages", `List (map (fun x -> `Int x) pnums));
("Path", `String n);
("Width", `Int w);
("Height", `Int h)])
images)
let obj_of_jpeg_data data = let obj_of_jpeg_data data =
let w, h = Cpdfjpeg.jpeg_dimensions data in let w, h = Cpdfjpeg.jpeg_dimensions data in