diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 653a5c0..9cc6b18 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -8,6 +8,10 @@ let version_date = "(patch 2, 25th October 2023)" open Pdfutil open Pdfio +let combine_with_spaces strs = + String.trim + (fold_left (fun x y -> x ^ (if x <> "" then " " else "") ^ y) "" strs) + let tempfiles = ref [] let exit n = @@ -4183,7 +4187,16 @@ let go () = if args.format_json then flprint (Cpdfyojson.Safe.pretty_to_string json) else - flprint "old fashioned output\n" + begin match json with + | `List l -> + iter + (function (`Assoc [(_, `Int i); (_, `List pages); (_, `String name); (_, `Int w); (_, `Int h); (_, `String cs)]) -> + let pages = combine_with_spaces (map (function `Int i -> string_of_int i | _ -> "") pages) in + flprint (Printf.sprintf "%i, %s, %s, %i, %i, %s\n" i pages name w h cs) + | _ -> ()) + l + | _ -> () + end | Some MissingFonts -> let pdf = get_single_pdf args.op true in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in diff --git a/cpdfimage.ml b/cpdfimage.ml index 3d74759..60ce844 100644 --- a/cpdfimage.ml +++ b/cpdfimage.ml @@ -272,56 +272,77 @@ let image_resolution pdf range dpi = image_resolution pdf range dpi; rev !image_results -(* FIXME Add colourspaces and anything else relevant *) (* All the images in file referenced at least once from the given range of pages. *) let images pdf range = let images = null_hash () in - Cpdfpage.iter_pages - (fun pagenum page -> - match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with - | Some (Pdf.Dictionary xobjects) -> - iter - (function (name, xobject) -> - match Pdf.lookup_direct pdf "/Subtype" xobject with - | Some (Pdf.Name "/Image") -> - begin match xobject with - | Pdf.Indirect i -> - begin match Hashtbl.find images i with - | (pagenums, n, w, h) -> - Hashtbl.replace images i (pagenum::pagenums, n, w, h) - | exception Not_found -> - let width = - match Pdf.lookup_direct pdf "/Width" xobject with - | Some x -> Pdf.getnum pdf x - | None -> 1. - and height = - match Pdf.lookup_direct pdf "/Height" xobject with - | Some x -> Pdf.getnum pdf x - | None -> 1. - in - Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height) - end - | _ -> () - end - (* FIXME Look into form xobjects recursively *) - | _ -> ()) - xobjects - | _ -> ()) - pdf - range; - (* Sort page numbers, then sort by first page number appearing, and build JSON structure *) - let images = list_of_hashtbl images in - let images = map (fun (i, (pnums, n, w, h)) -> (i, (setify (sort compare pnums), n, w, h))) images in - let images = sort (fun (_, (pnums, _, _, _)) (_, (pnums', _, _, _)) -> compare (hd pnums) (hd pnums')) images in - `List - (map - (fun (i, (pnums, n, w, h)) -> - `Assoc [("Object", `Int i); - ("Pages", `List (map (fun x -> `Int x) pnums)); - ("Path", `String n); - ("Width", `Int w); - ("Height", `Int h)]) - images) + let formnums = null_hash () in + let rec process_xobject resources pagenum page (name, xobject) = + match Pdf.lookup_direct pdf "/Subtype" xobject with + | Some (Pdf.Name "/Image") -> + begin match xobject with + | Pdf.Indirect i -> + begin match Hashtbl.find images i with + | (pagenums, n, w, h, cs) -> + Hashtbl.replace images i (pagenum::pagenums, n, w, h, cs) + | exception Not_found -> + let width = + match Pdf.lookup_direct pdf "/Width" xobject with + | Some x -> Pdf.getnum pdf x + | None -> 1. + and height = + match Pdf.lookup_direct pdf "/Height" xobject with + | Some x -> Pdf.getnum pdf x + | None -> 1. + and colourspace = + match Pdf.lookup_direct pdf "/ColorSpace" xobject with + | Some x -> Some (Pdfspace.string_of_colourspace (Pdfspace.read_colourspace pdf resources x)) + | None -> None + in + Hashtbl.replace images i ([pagenum], name, int_of_float width, int_of_float height, colourspace) + end + | _ -> () + end + | Some (Pdf.Name "/Form") -> + begin match xobject with + | Pdf.Indirect i -> + begin match Hashtbl.find formnums i with + | () -> () + | exception Not_found -> + Hashtbl.add formnums i (); + begin match Pdf.lookup_direct pdf "/Resources" xobject with + | Some r -> + begin match Pdf.lookup_direct pdf "/XObject" r with + | Some (Pdf.Dictionary xobjects) -> iter (process_xobject r pagenum page) xobjects + | _ -> () + end + | None -> () + end + end + | _ -> () + end + | _ -> () + in + Cpdfpage.iter_pages + (fun pagenum page -> + match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with + | Some (Pdf.Dictionary xobjects) -> + iter (process_xobject page.Pdfpage.resources pagenum page) xobjects + | _ -> ()) + pdf + range; + let images = list_of_hashtbl images in + let images = map (fun (i, (pnums, n, w, h, c)) -> (i, (setify (sort compare pnums), n, w, h, c))) images in + let images = sort (fun (_, (pnums, _, _, _, _)) (_, (pnums', _, _, _, _)) -> compare (hd pnums) (hd pnums')) images in + `List + (map + (fun (i, (pnums, n, w, h, cs)) -> + `Assoc [("Object", `Int i); + ("Pages", `List (map (fun x -> `Int x) pnums)); + ("Name", `String n); + ("Width", `Int w); + ("Height", `Int h); + ("Colourspace", match cs with None -> `Null | Some s -> `String s)]) + images) let obj_of_jpeg_data data = let w, h = Cpdfjpeg.jpeg_dimensions data in