Extend -list-images

This commit is contained in:
John Whitington 2023-12-28 15:48:30 +00:00
parent 6f3cf112f9
commit 707525d8da
3 changed files with 27 additions and 12 deletions

View File

@ -4306,9 +4306,9 @@ let go () =
begin match json with begin match json with
| `List l -> | `List l ->
iter iter
(function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `String cs)]) -> (function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `Int size); (_, `Int bpc); (_, `String cs); (_, `String filter)]) ->
let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in
flprint (Printf.sprintf "%i, %s, %s, %i, %i, %s\n" i pages name w h cs) flprint (Printf.sprintf "%i, %s, %s, %i, %i, %i, %i, %s, %s\n" i pages name w h size bpc cs filter)
| _ -> ()) | _ -> ())
l l
| _ -> () | _ -> ()

View File

@ -348,8 +348,8 @@ let images pdf range =
begin match xobject with begin match xobject with
| Pdf.Indirect i -> | Pdf.Indirect i ->
begin match Hashtbl.find images i with begin match Hashtbl.find images i with
| (pagenums, n, w, h, cs) -> | (pagenums, n, w, h, s, bpc, cs, f) ->
Hashtbl.replace images i (pagenum::pagenums, n, w, h, cs) Hashtbl.replace images i (pagenum::pagenums, n, w, h, s, bpc, cs, f)
| exception Not_found -> | exception Not_found ->
let width = let width =
match Pdf.lookup_direct pdf "/Width" xobject with match Pdf.lookup_direct pdf "/Width" xobject with
@ -359,12 +359,24 @@ let images pdf range =
match Pdf.lookup_direct pdf "/Height" xobject with match Pdf.lookup_direct pdf "/Height" xobject with
| Some x -> Pdf.getnum pdf x | Some x -> Pdf.getnum pdf x
| None -> 1. | None -> 1.
and size =
match Pdf.lookup_direct pdf "/Length" xobject with
| Some (Pdf.Integer x) -> x
| _ -> 0
and bpc =
match Pdf.lookup_direct pdf "/BitsPerComponent" xobject with
| Some (Pdf.Integer x) -> x
| _ -> 0
and colourspace = and colourspace =
match Pdf.lookup_direct pdf "/ColorSpace" xobject with match Pdf.lookup_direct pdf "/ColorSpace" xobject with
| Some x -> Some (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources x)) | Some x -> Some (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources x))
| None -> None | None -> None
and filter =
match Pdf.lookup_direct pdf "/Filter" xobject with
| Some (Pdf.Array [x]) | Some x -> Some (Pdfwrite.string_of_pdf x)
| None -> None
in in
Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, colourspace) Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, size, bpc, colourspace, filter)
end end
| _ -> () | _ -> ()
end end
@ -397,17 +409,20 @@ let images pdf range =
pdf pdf
range; range;
let images = list_of_hashtbl images in let images = list_of_hashtbl images in
let images = map (fun (i, (pnums, n, w, h, c)) -> (i, (setify (sort compare pnums), n, w, h, c))) images in let images = map (fun (i, (pnums, n, w, h, s, bpc, c, filter)) -> (i, (setify (sort compare pnums), n, w, h, s, bpc, c, filter))) images in
let images = sort (fun (_, (pnums, _, _, _, _)) (_, (pnums', _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in let images = sort (fun (_, (pnums, _, _, _, _, _, _, _)) (_, (pnums', _, _, _, _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in
`List `List
(map (map
(fun (i, (pnums, n, w, h, cs)) -> (fun (i, (pnums, n, w, h, size, bpc, cs, filter)) ->
`Assoc [("Object", `Int i); `Assoc [("Object", `Int i);
("Pages", `List (map (fun x -> `Int x) pnums)); ("Pages", `List (map (fun x -> `Int x) pnums));
("Name", `String n); ("Name", `String n);
("Width", `Int w); ("Width", `Int w);
("Height", `Int h); ("Height", `Int h);
("Colourspace", match cs with None -> `Null | Some s -> `String s)]) ("Bytes", `Int size);
("BitsPerComponent", `Int bpc);
("Colourspace", match cs with None -> `Null | Some s -> `String s);
("Filter", match filter with None -> `Null | Some s -> `String s)])
images) images)
let obj_of_jpeg_data data = let obj_of_jpeg_data data =

View File

@ -1,8 +1,8 @@
(** Images *) (** Images *)
(** Extract images. *) (** Extract images. *)
val extract_images : ?raw:bool -> ?path_to_p2p:string -> val extract_images :
?path_to_im:string -> ?raw:bool -> ?path_to_p2p:string -> ?path_to_im:string ->
Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit
(** Report image resolutions. *) (** Report image resolutions. *)