Tag and title images

This commit is contained in:
John Whitington 2024-09-11 13:57:57 +01:00
parent 56a68c57cb
commit e6c34277cf
5 changed files with 31 additions and 20 deletions

View File

@ -543,7 +543,8 @@ type args =
mutable jbig2_lossy_threshold : float; mutable jbig2_lossy_threshold : float;
mutable extract_stream_decompress : bool; mutable extract_stream_decompress : bool;
mutable verify_single : string option; mutable verify_single : string option;
mutable draw_struct_tree : bool} mutable draw_struct_tree : bool;
mutable image_title : string option}
let args = let args =
{op = None; {op = None;
@ -679,7 +680,8 @@ let args =
jbig2_lossy_threshold = 0.85; jbig2_lossy_threshold = 0.85;
extract_stream_decompress = false; extract_stream_decompress = false;
verify_single = None; verify_single = None;
draw_struct_tree = false} draw_struct_tree = false;
image_title = None}
(* Do not reset original_filename or cpdflin or was_encrypted or (* Do not reset original_filename or cpdflin or was_encrypted or
was_decrypted_with_owner or recrypt or producer or creator or path_to_* or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
@ -803,7 +805,8 @@ let reset_arguments () =
args.extract_stream_decompress <- false; args.extract_stream_decompress <- false;
clear Cpdfdrawcontrol.fontpack_initialised; clear Cpdfdrawcontrol.fontpack_initialised;
args.verify_single <- None; args.verify_single <- None;
args.draw_struct_tree <- false args.draw_struct_tree <- false;
args.image_title <- None
(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume (* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume
installed at a system place *) installed at a system place *)
@ -2825,7 +2828,8 @@ let specs =
("-use", Arg.String Cpdfdrawcontrol.usexobj, " Use a saved sequence of graphics operators"); ("-use", Arg.String Cpdfdrawcontrol.usexobj, " Use a saved sequence of graphics operators");
("-draw-jpeg", Arg.String Cpdfdrawcontrol.addjpeg, " Load a JPEG from file and name it"); ("-draw-jpeg", Arg.String Cpdfdrawcontrol.addjpeg, " Load a JPEG from file and name it");
("-draw-png", Arg.String Cpdfdrawcontrol.addpng, " Load a PNG from file and name it"); ("-draw-png", Arg.String Cpdfdrawcontrol.addpng, " Load a PNG from file and name it");
("-image", Arg.String Cpdfdrawcontrol.addimage, " Draw an image which has already been loaded"); ("-image", Arg.String (fun s -> Cpdfdrawcontrol.addimage ?title:args.image_title s), " Draw an image which has already been loaded");
("-image-title", Arg.String (fun s -> args.image_title <- Some s), " Give title for future images");
("-fill-opacity", Arg.Float Cpdfdrawcontrol.addopacity, " Set opacity"); ("-fill-opacity", Arg.Float Cpdfdrawcontrol.addopacity, " Set opacity");
("-stroke-opacity", Arg.Float Cpdfdrawcontrol.addsopacity, " Set stroke opacity"); ("-stroke-opacity", Arg.Float Cpdfdrawcontrol.addsopacity, " Set stroke opacity");
("-bt", Arg.Unit Cpdfdrawcontrol.addbt, " Begin text"); ("-bt", Arg.Unit Cpdfdrawcontrol.addbt, " Begin text");

View File

@ -34,7 +34,7 @@ type drawops =
| FormXObject of float * float * float * float * string * drawops list | FormXObject of float * float * float * float * string * drawops list
| Use of string | Use of string
| ImageXObject of string * Pdf.pdfobject | ImageXObject of string * Pdf.pdfobject
| Image of string | Image of string * string option
| NewPage | NewPage
| Opacity of float | Opacity of float
| SOpacity of float | SOpacity of float
@ -202,7 +202,7 @@ let mcpage = ref ~-1
type structdata = type structdata =
| StDataBeginTree of string | StDataBeginTree of string
| StDataEndTree | StDataEndTree
| StDataMCID of string * int | StDataMCID of string * int * string option
| StDataPage of int | StDataPage of int
let structdata = ref [] let structdata = ref []
@ -251,10 +251,12 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in let pdfname = try fst (Hashtbl.find (res ()).form_xobjects n) with _ -> error ("Form XObject not found: " ^ n) in
(res ()).page_names <- pdfname::(res ()).page_names; (res ()).page_names <- pdfname::(res ()).page_names;
[Pdfops.Op_Do pdfname] [Pdfops.Op_Do pdfname]
| Image s -> | Image (s, t) ->
let m = mcid () in
if not dryrun then structdata := StDataMCID ("/Figure", m, t)::!structdata;
let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in let pdfname = try fst (Hashtbl.find (res ()).images s) with _ -> error ("Image not found: " ^ s) in
(res ()).page_names <- pdfname::(res ()).page_names; (res ()).page_names <- pdfname::(res ()).page_names;
[Pdfops.Op_Do pdfname] [Pdfops.Op_BDC ("/Figure", Pdf.Dictionary ["/MCID", Pdf.Integer m]); Pdfops.Op_Do pdfname; Pdfops.Op_EMC]
| ImageXObject (s, obj) -> | ImageXObject (s, obj) ->
Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj); Hashtbl.replace (res ()).images s (fresh_name "/I", Pdf.addobj pdf obj);
[] []
@ -310,7 +312,7 @@ let rec ops_of_drawop dryrun pdf endpage filename bates batespad num page = func
[] []
| TextSection ops -> | TextSection ops ->
let m = mcid () in let m = mcid () in
if not dryrun then structdata := StDataMCID ("/P", m)::!structdata; if not dryrun then structdata := StDataMCID ("/P", m, None)::!structdata;
[Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]); [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m]);
Pdfops.Op_BT] Pdfops.Op_BT]
@ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @ @ ops_of_drawops dryrun pdf endpage filename bates batespad num page ops @
@ -445,7 +447,7 @@ let dryrun ~filename ~bates ~batespad range pdf chunks =
type st = type st =
StMCID of int StMCID of int
| StItem of {kind : string; pageobjnum : int; children : st list} | StItem of {kind : string; pageobjnum : int; alt : string option; children : st list}
(* Build a tree from the MCIDs and structure tree instructions gathered *) (* Build a tree from the MCIDs and structure tree instructions gathered *)
let make_structure_tree pdf items = let make_structure_tree pdf items =
@ -458,8 +460,8 @@ let make_structure_tree pdf items =
in in
(* Process the items, making the st list tree data structure *) (* Process the items, making the st list tree data structure *)
let process = function let process = function
| StDataMCID (n, mcid) -> | StDataMCID (n, mcid, alt) ->
items_out =| StItem {kind = n; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]} items_out =| StItem {kind = n; alt; pageobjnum = unopt (lookup !pagenum pageobjnums); children = [StMCID mcid]}
| StDataPage n -> | StDataPage n ->
pagenum := n pagenum := n
| _ -> () | _ -> ()
@ -481,14 +483,19 @@ let write_structure_tree pdf st =
let struct_tree_root = Pdf.addobj pdf Pdf.Null in let struct_tree_root = Pdf.addobj pdf Pdf.Null in
let items = let items =
map map
(function StItem {kind; pageobjnum; children} -> (function StItem {kind; pageobjnum; alt; children} ->
let this_objnum = Pdf.addobj pdf Pdf.Null in let this_objnum = Pdf.addobj pdf Pdf.Null in
let alt =
match alt with
| Some s -> [("/Alt", Pdf.String s)]
| None -> []
in
let this_obj = let this_obj =
Pdf.Dictionary [("/S", Pdf.Name kind); Pdf.Dictionary (alt @ [("/S", Pdf.Name kind);
("/Pg", Pdf.Indirect pageobjnum); ("/Pg", Pdf.Indirect pageobjnum);
("/P", Pdf.Indirect struct_tree_root); ("/P", Pdf.Indirect struct_tree_root);
("/K", Pdf.Array (map (function StMCID x -> add_parentmap pageobjnum this_objnum; Pdf.Integer x ("/K", Pdf.Array (map (function StMCID x -> add_parentmap pageobjnum this_objnum; Pdf.Integer x
| _ -> assert false) children))] | _ -> assert false) children))])
in in
Pdf.addobj_given_num pdf (this_objnum, this_obj); Pdf.addobj_given_num pdf (this_objnum, this_obj);
Pdf.Indirect this_objnum Pdf.Indirect this_objnum

View File

@ -33,7 +33,7 @@ type drawops =
| FormXObject of float * float * float * float * string * drawops list | FormXObject of float * float * float * float * string * drawops list
| Use of string | Use of string
| ImageXObject of string * Pdf.pdfobject | ImageXObject of string * Pdf.pdfobject
| Image of string | Image of string * string option
| NewPage | NewPage
| Opacity of float | Opacity of float
| SOpacity of float | SOpacity of float

View File

@ -270,8 +270,8 @@ let addpng ?data n =
let data = Pdfio.bytes_of_string (contents_of_file filename) in let data = Pdfio.bytes_of_string (contents_of_file filename) in
addop (Cpdfdraw.ImageXObject (name, fst (Cpdfimage.obj_of_png_data data))) addop (Cpdfdraw.ImageXObject (name, fst (Cpdfimage.obj_of_png_data data)))
let addimage s = let addimage ?title s =
addop (Cpdfdraw.Image s) addop (Cpdfdraw.Image (s, title))
let addnewpage s = let addnewpage s =
addop Cpdfdraw.NewPage addop Cpdfdraw.NewPage

View File

@ -50,7 +50,7 @@ val endxobj : unit -> unit
val usexobj : string -> unit val usexobj : string -> unit
val addjpeg : ?data:Pdfio.rawbytes -> string -> unit val addjpeg : ?data:Pdfio.rawbytes -> string -> unit
val addpng : ?data:Pdfio.rawbytes -> string -> unit val addpng : ?data:Pdfio.rawbytes -> string -> unit
val addimage : string -> unit val addimage : ?title:string -> string -> unit
val addopacity : float -> unit val addopacity : float -> unit
val addsopacity : float -> unit val addsopacity : float -> unit
val addbt : unit -> unit val addbt : unit -> unit