-list-images finished
This commit is contained in:
parent
ab405a7330
commit
24f899e346
|
@ -8,6 +8,10 @@ let version_date = "(patch 2, 25th October 2023)"
|
||||||
open Pdfutil
|
open Pdfutil
|
||||||
open Pdfio
|
open Pdfio
|
||||||
|
|
||||||
|
let combine_with_spaces strs =
|
||||||
|
String.trim
|
||||||
|
(fold_left (fun x y -> x ^ (if x <> "" then " " else "") ^ y) "" strs)
|
||||||
|
|
||||||
let tempfiles = ref []
|
let tempfiles = ref []
|
||||||
|
|
||||||
let exit n =
|
let exit n =
|
||||||
|
@ -4183,7 +4187,16 @@ let go () =
|
||||||
if args.format_json then
|
if args.format_json then
|
||||||
flprint (Cpdfyojson.Safe.pretty_to_string json)
|
flprint (Cpdfyojson.Safe.pretty_to_string json)
|
||||||
else
|
else
|
||||||
flprint "old fashioned output\n"
|
begin match json with
|
||||||
|
| `List l ->
|
||||||
|
iter
|
||||||
|
(function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `String cs)]) ->
|
||||||
|
let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in
|
||||||
|
flprint (Printf.sprintf "%i, %s, %s, %i, %i, %s\n" i pages name w h cs)
|
||||||
|
| _ -> ())
|
||||||
|
l
|
||||||
|
| _ -> ()
|
||||||
|
end
|
||||||
| Some MissingFonts ->
|
| Some MissingFonts ->
|
||||||
let pdf = get_single_pdf args.op true in
|
let pdf = get_single_pdf args.op true in
|
||||||
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
|
||||||
|
|
59
cpdfimage.ml
59
cpdfimage.ml
|
@ -272,23 +272,18 @@ let image_resolution pdf range dpi =
|
||||||
image_resolution pdf range dpi;
|
image_resolution pdf range dpi;
|
||||||
rev !image_results
|
rev !image_results
|
||||||
|
|
||||||
(* FIXME Add colourspaces and anything else relevant *)
|
|
||||||
(* All the images in file referenced at least once from the given range of pages. *)
|
(* All the images in file referenced at least once from the given range of pages. *)
|
||||||
let images pdf range =
|
let images pdf range =
|
||||||
let images = null_hash () in
|
let images = null_hash () in
|
||||||
Cpdfpage.iter_pages
|
let formnums = null_hash () in
|
||||||
(fun pagenum page ->
|
let rec process_xobject resources pagenum page (name, xobject) =
|
||||||
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
|
||||||
| Some (Pdf.Dictionary xobjects) ->
|
|
||||||
iter
|
|
||||||
(function (name, xobject) ->
|
|
||||||
match Pdf.lookup_direct pdf "/Subtype" xobject with
|
match Pdf.lookup_direct pdf "/Subtype" xobject with
|
||||||
| Some (Pdf.Name "/Image") ->
|
| Some (Pdf.Name "/Image") ->
|
||||||
begin match xobject with
|
begin match xobject with
|
||||||
| Pdf.Indirect i ->
|
| Pdf.Indirect i ->
|
||||||
begin match Hashtbl.find images i with
|
begin match Hashtbl.find images i with
|
||||||
| (pagenums, n, w, h) ->
|
| (pagenums, n, w, h, cs) ->
|
||||||
Hashtbl.replace images i (pagenum::pagenums, n, w, h)
|
Hashtbl.replace images i (pagenum::pagenums, n, w, h, cs)
|
||||||
| exception Not_found ->
|
| exception Not_found ->
|
||||||
let width =
|
let width =
|
||||||
match Pdf.lookup_direct pdf "/Width" xobject with
|
match Pdf.lookup_direct pdf "/Width" xobject with
|
||||||
|
@ -298,29 +293,55 @@ let images pdf range =
|
||||||
match Pdf.lookup_direct pdf "/Height" xobject with
|
match Pdf.lookup_direct pdf "/Height" xobject with
|
||||||
| Some x -> Pdf.getnum pdf x
|
| Some x -> Pdf.getnum pdf x
|
||||||
| None -> 1.
|
| None -> 1.
|
||||||
|
and colourspace =
|
||||||
|
match Pdf.lookup_direct pdf "/ColorSpace" xobject with
|
||||||
|
| Some x -> Some (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources x))
|
||||||
|
| None -> None
|
||||||
in
|
in
|
||||||
Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height)
|
Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, colourspace)
|
||||||
end
|
end
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
end
|
end
|
||||||
(* FIXME Look into form xobjects recursively *)
|
| Some (Pdf.Name "/Form") ->
|
||||||
| _ -> ())
|
begin match xobject with
|
||||||
xobjects
|
| Pdf.Indirect i ->
|
||||||
|
begin match Hashtbl.find formnums i with
|
||||||
|
| () -> ()
|
||||||
|
| exception Not_found ->
|
||||||
|
Hashtbl.add formnums i ();
|
||||||
|
begin match Pdf.lookup_direct pdf "/Resources" xobject with
|
||||||
|
| Some r ->
|
||||||
|
begin match Pdf.lookup_direct pdf "/XObject" r with
|
||||||
|
| Some (Pdf.Dictionary xobjects) -> iter (process_xobject r pagenum page) xobjects
|
||||||
|
| _ -> ()
|
||||||
|
end
|
||||||
|
| None -> ()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| _ -> ()
|
||||||
|
end
|
||||||
|
| _ -> ()
|
||||||
|
in
|
||||||
|
Cpdfpage.iter_pages
|
||||||
|
(fun pagenum page ->
|
||||||
|
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
|
||||||
|
| Some (Pdf.Dictionary xobjects) ->
|
||||||
|
iter (process_xobject page.Pdfpage.resources pagenum page) xobjects
|
||||||
| _ -> ())
|
| _ -> ())
|
||||||
pdf
|
pdf
|
||||||
range;
|
range;
|
||||||
(* Sort page numbers, then sort by first page number appearing, and build JSON structure *)
|
|
||||||
let images = list_of_hashtbl images in
|
let images = list_of_hashtbl images in
|
||||||
let images = map (fun (i, (pnums, n, w, h)) -> (i, (setify (sort compare pnums), n, w, h))) images in
|
let images = map (fun (i, (pnums, n, w, h, c)) -> (i, (setify (sort compare pnums), n, w, h, c))) images in
|
||||||
let images = sort (fun (_, (pnums, _, _, _)) (_, (pnums', _, _, _)) -> compare (hd pnums) (hd pnums')) images in
|
let images = sort (fun (_, (pnums, _, _, _, _)) (_, (pnums', _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in
|
||||||
`List
|
`List
|
||||||
(map
|
(map
|
||||||
(fun (i, (pnums, n, w, h)) ->
|
(fun (i, (pnums, n, w, h, cs)) ->
|
||||||
`Assoc [("Object", `Int i);
|
`Assoc [("Object", `Int i);
|
||||||
("Pages", `List (map (fun x -> `Int x) pnums));
|
("Pages", `List (map (fun x -> `Int x) pnums));
|
||||||
("Path", `String n);
|
("Name", `String n);
|
||||||
("Width", `Int w);
|
("Width", `Int w);
|
||||||
("Height", `Int h)])
|
("Height", `Int h);
|
||||||
|
("Colourspace", match cs with None -> `Null | Some s -> `String s)])
|
||||||
images)
|
images)
|
||||||
|
|
||||||
let obj_of_jpeg_data data =
|
let obj_of_jpeg_data data =
|
||||||
|
|
Loading…
Reference in New Issue