From 707525d8da3305c507630ed13ef370669792e8fc Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 28 Dec 2023 15:48:30 +0000 Subject: [PATCH] Extend -list-images --- cpdfcommand.ml | 4 ++-- cpdfimage.ml | 29 ++++++++++++++++++++++------- cpdfimage.mli | 6 +++--- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 5d994f4..ec2dd22 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4306,9 +4306,9 @@ let go () = begin match json with | `List l -> iter - (function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `String cs)]) -> + (function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `Int size); (_, `Int bpc); (_, `String cs); (_, `String filter)]) -> let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in - flprint (Printf.sprintf "%i, %s, %s, %i, %i, %s\n" i pages name w h cs) + flprint (Printf.sprintf "%i, %s, %s, %i, %i, %i, %i, %s, %s\n" i pages name w h size bpc cs filter) | _ -> ()) l | _ -> () diff --git a/cpdfimage.ml b/cpdfimage.ml index a8829e3..0f6372c 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -348,8 +348,8 @@ let images pdf range = begin match xobject with | Pdf.Indirect i -> begin match Hashtbl.find images i with - | (pagenums, n, w, h, cs) -> - Hashtbl.replace images i (pagenum::pagenums, n, w, h, cs) + | (pagenums, n, w, h, s, bpc, cs, f) -> + Hashtbl.replace images i (pagenum::pagenums, n, w, h, s, bpc, cs, f) | exception Not_found -> let width = match Pdf.lookup_direct pdf "/Width" xobject with @@ -359,12 +359,24 @@ let images pdf range = match Pdf.lookup_direct pdf "/Height" xobject with | Some x -> Pdf.getnum pdf x | None -> 1. + and size = + match Pdf.lookup_direct pdf "/Length" xobject with + | Some (Pdf.Integer x) -> x + | _ -> 0 + and bpc = + match Pdf.lookup_direct pdf "/BitsPerComponent" xobject with + | Some (Pdf.Integer x) -> x + | _ -> 0 and colourspace = match Pdf.lookup_direct pdf "/ColorSpace" xobject with | Some x -> Some (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources x)) | None -> None + and filter = + match Pdf.lookup_direct pdf "/Filter" xobject with + | Some (Pdf.Array [x]) | Some x -> Some (Pdfwrite.string_of_pdf x) + | None -> None in - Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, colourspace) + Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, size, bpc, colourspace, filter) end | _ -> () end @@ -397,17 +409,20 @@ let images pdf range = pdf range; let images = list_of_hashtbl images in - let images = map (fun (i, (pnums, n, w, h, c)) -> (i, (setify (sort compare pnums), n, w, h, c))) images in - let images = sort (fun (_, (pnums, _, _, _, _)) (_, (pnums', _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in + let images = map (fun (i, (pnums, n, w, h, s, bpc, c, filter)) -> (i, (setify (sort compare pnums), n, w, h, s, bpc, c, filter))) images in + let images = sort (fun (_, (pnums, _, _, _, _, _, _, _)) (_, (pnums', _, _, _, _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in `List (map - (fun (i, (pnums, n, w, h, cs)) -> + (fun (i, (pnums, n, w, h, size, bpc, cs, filter)) -> `Assoc [("Object", `Int i); ("Pages", `List (map (fun x -> `Int x) pnums)); ("Name", `String n); ("Width", `Int w); ("Height", `Int h); - ("Colourspace", match cs with None -> `Null | Some s -> `String s)]) + ("Bytes", `Int size); + ("BitsPerComponent", `Int bpc); + ("Colourspace", match cs with None -> `Null | Some s -> `String s); + ("Filter", match filter with None -> `Null | Some s -> `String s)]) images) let obj_of_jpeg_data data = diff --git a/cpdfimage.mli b/cpdfimage.mli index 0adda57..df25a00 100644 --- a/cpdfimage.mli +++ b/cpdfimage.mli @@ -1,9 +1,9 @@ (** Images *) (** Extract images. *) -val extract_images : ?raw:bool -> ?path_to_p2p:string -> - ?path_to_im:string -> - Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit +val extract_images : + ?raw:bool -> ?path_to_p2p:string -> ?path_to_im:string -> + Cpdfmetadata.encoding -> bool -> bool -> Pdf.t -> int list -> string -> unit (** Report image resolutions. *) val image_resolution : Pdf.t -> int list -> float -> (int * string * int * int * float * float) list