From 6408bd887983e81d5fc74dececdd5b2dc7ce7fd0 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Tue, 16 Jan 2024 14:54:45 +0000 Subject: [PATCH] Specify image just by extension --- Changes | 4 +++- cpdfcommand.ml | 51 ++++++++++++++++++++++++++++---------------------- cpdfmanual.tex | 2 ++ cpdfpage.ml | 18 +++++++++++++----- 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/Changes b/Changes index 8171ecb..1f3d66d 100644 --- a/Changes +++ b/Changes @@ -24,8 +24,10 @@ Extended features: o -list-images-used[-json] extends -image-resolution o Use -raw with -extract-images to get PNMs o -extract-images can extract JBIG2 images and their globals -o more PNGs - greyscale 1, 2, 4, 8, 16bpp and RGB 16bpp +o More PNGs - greyscale 1, 2, 4, 8, 16bpp and RGB 16bpp o -pages -fast to print number of pages from /Count +o Report number of annotations in -page-info +o Specify image based only on file extension Fixes: diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 4587228..8750a56 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -1010,6 +1010,28 @@ let setowner s = let setuser s = args.user <- s +let set_input_image f s = + try + let fh = open_in_bin s in + let pdf = Cpdfimage.image_of_input f (Pdfio.input_of_channel fh) in + begin try close_in fh with _ -> () end; + args.original_filename <- s; + args.create_objstm <- true; + args.inputs <- (AlreadyInMemory (pdf, s), "all", "", "", ref false, None)::args.inputs + with + Sys_error _ -> error "Image file not found" + +let jbig2_global = ref None + +let set_input_png s = set_input_image (fun () -> Cpdfimage.obj_of_png_data) s + +let set_input_jpeg s = set_input_image (fun () -> Cpdfimage.obj_of_jpeg_data) s + +let set_input_jbig2 s = + set_input_image + (fun () -> Cpdfimage.obj_of_jbig2_data ?global:!jbig2_global) s; + args.remove_duplicate_streams <- true + let anon_fun s = try match !encrypt_to_collect with @@ -1038,7 +1060,13 @@ let anon_fun s = Not_found -> try ignore (String.index s '.'); - args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs; + begin match rev (explode s) with + | a::b::c::d::e::'.'::r when implode (map Char.uppercase_ascii [e; d; c; b; a]) = "JBIG2" -> set_input_jbig2 s + | a::b::c::d::'.'::r when implode (map Char.uppercase_ascii [d; c; b; a]) = "JPEG" -> set_input_jpeg s + | a::b::c::'.'::r when implode (map Char.uppercase_ascii [c; b; a]) = "JPG" -> set_input_jpeg s + | a::b::c::'.'::r when implode (map Char.uppercase_ascii [c; b; a]) = "PNG" -> set_input_png s + | _ -> args.inputs <- (InFile s, "all", "", "", ref false, None)::args.inputs + end; args.original_filename <- s with Not_found -> @@ -1820,27 +1848,6 @@ let addop o = begin match args.op with Some Draw -> () | _ -> error "Need to be in drawing mode for this." end; Cpdfdrawcontrol.addop o -let set_input_image f s = - try - let fh = open_in_bin s in - let pdf = Cpdfimage.image_of_input f (Pdfio.input_of_channel fh) in - begin try close_in fh with _ -> () end; - args.original_filename <- s; - args.create_objstm <- true; - args.inputs <- (AlreadyInMemory (pdf, s), "all", "", "", ref false, None)::args.inputs - with - Sys_error _ -> error "Image file not found" - -let jbig2_global = ref None - -let set_input_png s = set_input_image (fun () -> Cpdfimage.obj_of_png_data) s - -let set_input_jpeg s = set_input_image (fun () -> Cpdfimage.obj_of_jpeg_data) s - -let set_input_jbig2 s = - set_input_image - (fun () -> Cpdfimage.obj_of_jbig2_data ?global:!jbig2_global) s; - args.remove_duplicate_streams <- true let embed_font_inner font = match font with diff --git a/cpdfmanual.tex b/cpdfmanual.tex index f19c191..ef3e15a 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -11,6 +11,8 @@ %Document -jbig2 / -jbig2globals %Document -process-images %Document -pages -fast +%Document -page-info changes +%Document -png -jpeg -jbig2 by extension \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf, % dotnetcpdflibmanual.pdf, jcpdflibmanual.pdf jscpdflibmanual.pdf etc. diff --git a/cpdfpage.ml b/cpdfpage.ml index 672b8c1..f20c6d8 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -155,8 +155,8 @@ let change_pattern_matrices_page pdf tr page = (* Output information for each page *) let output_page_info ?(json=false) pdf range = - let pages = Pdfpage.pages_of_pagetree pdf - and labels = Pdfpagelabels.read pdf in + let pages = Pdfpage.pages_of_pagetree pdf in + let labels = Pdfpagelabels.read pdf in let getbox page box = if box = "/MediaBox" then match page.Pdfpage.mediabox with @@ -170,8 +170,14 @@ let output_page_info ?(json=false) pdf range = Printf.sprintf "%f %f %f %f" (Pdf.getnum pdf a) (Pdf.getnum pdf b) (Pdf.getnum pdf c) (Pdf.getnum pdf d) | _ -> "" - and rotation page = + in + let rotation page = Pdfpage.int_of_rotation page.Pdfpage.rotate + in + let num_annots page = + match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with + | Some (Pdf.Array a) -> length a + | _ -> 0 in let json_entry_of_pnum pnum = let getbox_json page box = @@ -190,7 +196,8 @@ let output_page_info ?(json=false) pdf range = ("BleedBox", getbox_json page "/BleedBox"); ("TrimBox", getbox_json page "/TrimBox"); ("ArtBox", getbox_json page "/ArtBox"); - ("Rotation", `Int (rotation page))] + ("Rotation", `Int (rotation page)); + ("Annotations", `Int (num_annots page))] in if json then flprint (Cpdfyojson.Safe.pretty_to_string (`List (map json_entry_of_pnum range))) @@ -206,7 +213,8 @@ let output_page_info ?(json=false) pdf range = Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox"); Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox"); Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox"); - Printf.printf "Rotation: %i\n" (rotation page)) + Printf.printf "Rotation: %i\n" (rotation page); + Printf.printf "Annotations: %i\n" (num_annots page)) range let process_pages f pdf range =