From e6c34277cfacb912d9bc30898146796f07f47c84 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 11 Sep 2024 13:57:57 +0100 Subject: [PATCH] Tag and title images --- cpdfcommand.ml | 12 ++++++++---- cpdfdraw.ml | 31 +++++++++++++++++++------------ cpdfdraw.mli | 2 +- cpdfdrawcontrol.ml | 4 ++-- cpdfdrawcontrol.mli | 2 +- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index 7db3ff7..601a178 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -543,7 +543,8 @@ type args = mutable jbig2_lossy_threshold : float; mutable extract_stream_decompress : bool; mutable verify_single : string option; - mutable draw_struct_tree : bool} + mutable draw_struct_tree : bool; + mutable image_title : string option} let args = {op = None; @@ -679,7 +680,8 @@ let args = jbig2_lossy_threshold = 0.85; extract_stream_decompress = false; verify_single = None; - draw_struct_tree = false} + draw_struct_tree = false; + image_title = None} (* Do not reset original_filename or cpdflin or was_encrypted or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or @@ -803,7 +805,8 @@ let reset_arguments () = args.extract_stream_decompress <- false; clear Cpdfdrawcontrol.fontpack_initialised; args.verify_single <- None; - args.draw_struct_tree <- false + args.draw_struct_tree <- false; + args.image_title <- None (* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume installed at a system place *) @@ -2825,7 +2828,8 @@ let specs = ("-use", Arg.String Cpdfdrawcontrol.usexobj, " Use a saved sequence of graphics operators"); ("-draw-jpeg", Arg.String Cpdfdrawcontrol.addjpeg, " Load a JPEG from file and name it"); ("-draw-png", Arg.String Cpdfdrawcontrol.addpng, " Load a PNG from file and name it"); - ("-image", Arg.String Cpdfdrawcontrol.addimage, " Draw an image which has already been loaded"); + ("-image", Arg.String (fun s -> Cpdfdrawcontrol.addimage ?title:args.image_title s), " Draw an image which has already been loaded"); + ("-image-title", Arg.String (fun s -> args.image_title <- Some s), " Give title for future images"); ("-fill-opacity", Arg.Float Cpdfdrawcontrol.addopacity, " Set opacity"); ("-stroke-opacity", Arg.Float Cpdfdrawcontrol.addsopacity, " Set stroke opacity"); ("-bt", Arg.Unit Cpdfdrawcontrol.addbt, " Begin text"); diff --git a/cpdfdraw.ml b/cpdfdraw.ml index 2bc5ca2..e512e0b 100644 --- a/cpdfdraw.ml +++ b/cpdfdraw.ml @@ -34,7 +34,7 @@ type drawops = | FormXObject of float * float * float * float * string * drawops list | Use of string | ImageXObject of string * Pdf.pdfobject - | Image of string + | Image of string * string option | NewPage | Opacity of float | SOpacity of float @@ -202,7 +202,7 @@ let mcpage = ref ~-1 type structdata = | StDataBeginTree of string | StDataEndTree - | StDataMCID of string * int + | StDataMCID of string * int * string option | StDataPage of int let structdata = ref [] @@ -251,10 +251,12 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in (res ()).page_names <- pdfname::(res ()).page_names; [Pdfops.Op_Do pdfname] - | Image s -> + | Image (s, t) -> + let m = mcid () in + if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata; let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in (res ()).page_names <- pdfname::(res ()).page_names; - [Pdfops.Op_Do pdfname] + [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_Do pdfname; Pdfops.Op_EMC] | ImageXObject (s, obj) -> Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj); [] @@ -310,7 +312,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func [] | TextSection ops -> let m = mcid () in - if not dryrun then structdata := StDataMCID ("/P", m)::!structdata; + if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata; [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_BT] @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ @@ -445,7 +447,7 @@ let dryrun ~filename ~bates ~batespad range pdf chunks = type st = StMCID of int - | StItem of {kind : string; pageobjnum : int; children : st list} + | StItem of {kind : string; pageobjnum : int; alt : string option; children : st list} (* Build a tree from the MCIDs and structure tree instructions gathered *) let make_structure_tree pdf items = @@ -458,8 +460,8 @@ let make_structure_tree pdf items = in (* Process the items, making the st list tree data structure *) let process = function - | StDataMCID (n, mcid) -> - items_out =| StItem {kind = n; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]} + | StDataMCID (n, mcid, alt) -> + items_out =| StItem {kind = n; alt; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]} | StDataPage n -> pagenum := n | _ -> () @@ -481,14 +483,19 @@ let write_structure_tree pdf st = let struct_tree_root = Pdf.addobj pdf Pdf.Null in let items = map - (function StItem {kind; pageobjnum; children} -> + (function StItem {kind; pageobjnum; alt; children} -> let this_objnum = Pdf.addobj pdf Pdf.Null in - let this_obj = - Pdf.Dictionary [("/S", Pdf.Name kind); + let alt = + match alt with + | Some s -> [("/Alt", Pdf.String s)] + | None -> [] + in + let this_obj = + Pdf.Dictionary (alt @ [("/S", Pdf.Name kind); ("/Pg", Pdf.Indirect pageobjnum); ("/P", Pdf.Indirect struct_tree_root); ("/K", Pdf.Array (map (function StMCID x -> add_parentmap pageobjnum this_objnum; Pdf.Integer x - | _ -> assert false) children))] + | _ -> assert false) children))]) in Pdf.addobj_given_num pdf (this_objnum, this_obj); Pdf.Indirect this_objnum diff --git a/cpdfdraw.mli b/cpdfdraw.mli index 920044b..8db8cbd 100644 --- a/cpdfdraw.mli +++ b/cpdfdraw.mli @@ -33,7 +33,7 @@ type drawops = | FormXObject of float * float * float * float * string * drawops list | Use of string | ImageXObject of string * Pdf.pdfobject - | Image of string + | Image of string * string option | NewPage | Opacity of float | SOpacity of float diff --git a/cpdfdrawcontrol.ml b/cpdfdrawcontrol.ml index de7da28..041dbb4 100644 --- a/cpdfdrawcontrol.ml +++ b/cpdfdrawcontrol.ml @@ -270,8 +270,8 @@ let addpng ?data n = let data = Pdfio.bytes_of_string (contents_of_file filename) in addop (Cpdfdraw.ImageXObject (name, fst (Cpdfimage.obj_of_png_data data))) -let addimage s = - addop (Cpdfdraw.Image s) +let addimage ?title s = + addop (Cpdfdraw.Image (s, title)) let addnewpage s = addop Cpdfdraw.NewPage diff --git a/cpdfdrawcontrol.mli b/cpdfdrawcontrol.mli index 654cb37..49da980 100644 --- a/cpdfdrawcontrol.mli +++ b/cpdfdrawcontrol.mli @@ -50,7 +50,7 @@ val endxobj : unit -> unit val usexobj : string -> unit val addjpeg : ?data:Pdfio.rawbytes -> string -> unit val addpng : ?data:Pdfio.rawbytes -> string -> unit -val addimage : string -> unit +val addimage : ?title:string -> string -> unit val addopacity : float -> unit val addsopacity : float -> unit val addbt : unit -> unit