Rework, add elt_info
This commit is contained in:
parent
6df2ff7a9c
commit
a5e49e0986
|
@ -1907,6 +1907,13 @@ let setreadableops () =
|
||||||
Pdfops.always_add_whitespace := true;
|
Pdfops.always_add_whitespace := true;
|
||||||
Pdfops.write_comments := true
|
Pdfops.write_comments := true
|
||||||
|
|
||||||
|
let addeltinfo s =
|
||||||
|
match String.split_on_char '=' s with
|
||||||
|
| h::t ->
|
||||||
|
let pdfobj = Pdfread.parse_single_object (String.concat "" t) in
|
||||||
|
Cpdfdrawcontrol.eltinfo h pdfobj
|
||||||
|
| [] -> error "addeltinfo: bad format"
|
||||||
|
|
||||||
let specs =
|
let specs =
|
||||||
[("-version",
|
[("-version",
|
||||||
Arg.Unit (setop Version),
|
Arg.Unit (setop Version),
|
||||||
|
@ -2838,7 +2845,7 @@ let specs =
|
||||||
("-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.artifact ()), " Begin an artifact");
|
("-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.artifact ()), " Begin an artifact");
|
||||||
("-end-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.endartifact ()), "End an artifact");
|
("-end-artifact", Arg.Unit (fun _ -> Cpdfdrawcontrol.endartifact ()), "End an artifact");
|
||||||
("-no-auto-artifacts", Arg.Unit (fun _ -> Cpdfdrawcontrol.autoartifacts false), " Don't mark untagged content as artifacts");
|
("-no-auto-artifacts", Arg.Unit (fun _ -> Cpdfdrawcontrol.autoartifacts false), " Don't mark untagged content as artifacts");
|
||||||
("-eltinfo", Arg.String (fun s -> Cpdfdrawcontrol.eltinfo s), " Add element information");
|
("-eltinfo", Arg.String addeltinfo, " Add element information");
|
||||||
("-end-eltinfo", Arg.String (fun s -> Cpdfdrawcontrol.endeltinfo s), " Erase element information");
|
("-end-eltinfo", Arg.String (fun s -> Cpdfdrawcontrol.endeltinfo s), " Erase element information");
|
||||||
("-namespace", Arg.String (fun s -> Cpdfdrawcontrol.addnamespace (expand_namespace s)), " Set the structure tree namespace");
|
("-namespace", Arg.String (fun s -> Cpdfdrawcontrol.addnamespace (expand_namespace s)), " Set the structure tree namespace");
|
||||||
("-rolemap", Arg.String (fun s -> Cpdfdrawcontrol.setrolemap s), " Set a role map");
|
("-rolemap", Arg.String (fun s -> Cpdfdrawcontrol.setrolemap s), " Set a role map");
|
||||||
|
|
49
cpdfdraw.ml
49
cpdfdraw.ml
|
@ -67,7 +67,7 @@ type drawops =
|
||||||
| BeginArtifact
|
| BeginArtifact
|
||||||
| EndArtifact
|
| EndArtifact
|
||||||
| Namespace of string
|
| Namespace of string
|
||||||
| EltInfo of string * string
|
| EltInfo of string * Pdf.pdfobject
|
||||||
| EndEltInfo of string
|
| EndEltInfo of string
|
||||||
|
|
||||||
(*let rec string_of_drawop = function
|
(*let rec string_of_drawop = function
|
||||||
|
@ -277,7 +277,7 @@ type structdata =
|
||||||
| StDataMCID of string * int
|
| StDataMCID of string * int
|
||||||
| StDataPage of int
|
| StDataPage of int
|
||||||
| StDataNamespace of string
|
| StDataNamespace of string
|
||||||
| StEltInfo of string * string
|
| StEltInfo of string * Pdf.pdfobject
|
||||||
| StEndEltInfo of string
|
| StEndEltInfo of string
|
||||||
|
|
||||||
let structdata = ref []
|
let structdata = ref []
|
||||||
|
@ -642,7 +642,7 @@ let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks =
|
||||||
|
|
||||||
type st =
|
type st =
|
||||||
StMCID of int
|
StMCID of int
|
||||||
| StItem of {kind : string; namespace : string; pageobjnum : int option; alt : string option; children : st list}
|
| StItem of {kind : string; namespace : string; pageobjnum : int option; alt : (string * Pdf.pdfobject) list; children : st list}
|
||||||
|
|
||||||
(* Build a tree from the MCIDs and structure tree instructions gathered *)
|
(* Build a tree from the MCIDs and structure tree instructions gathered *)
|
||||||
let rec find_tree_contents a level = function
|
let rec find_tree_contents a level = function
|
||||||
|
@ -653,24 +653,38 @@ let rec find_tree_contents a level = function
|
||||||
if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
|
if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
|
||||||
| h::t -> find_tree_contents (h::a) level t
|
| h::t -> find_tree_contents (h::a) level t
|
||||||
|
|
||||||
let rec make_structure_tree pageobjnums pdf pagenum namespace = function
|
let rec make_structure_tree pageobjnums (pn, ns, ei) pdf = function
|
||||||
| [] -> []
|
| [] -> []
|
||||||
| StDataMCID (n, mcid)::t ->
|
| StDataMCID (n, mcid)::t ->
|
||||||
StItem {kind = n; namespace = !namespace; alt = None; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}::make_structure_tree pageobjnums pdf pagenum namespace t
|
Printf.printf "StDataMCID, pagenum = %i, pageobjnum = %i\n" !pn (unopt (lookup !pn pageobjnums));
|
||||||
|
let item =
|
||||||
|
StItem {kind = n; namespace = !ns; alt = list_of_hashtbl ei; pageobjnum = lookup !pn pageobjnums; children = [StMCID mcid]}
|
||||||
|
in
|
||||||
|
item::make_structure_tree pageobjnums (pn, ns, ei) pdf t
|
||||||
| StDataPage n::t ->
|
| StDataPage n::t ->
|
||||||
pagenum := n;
|
Printf.printf "StDataPage %i\n" n;
|
||||||
make_structure_tree pageobjnums pdf pagenum namespace t
|
pn := n;
|
||||||
|
make_structure_tree pageobjnums (pn, ns, ei) pdf t
|
||||||
| StDataNamespace s::t ->
|
| StDataNamespace s::t ->
|
||||||
namespace := s;
|
Printf.printf "StDataNamespace %s\n" s;
|
||||||
make_structure_tree pageobjnums pdf pagenum namespace t
|
ns := s;
|
||||||
|
make_structure_tree pageobjnums (pn, ns, ei) pdf t
|
||||||
| StEltInfo (k, v)::t ->
|
| StEltInfo (k, v)::t ->
|
||||||
make_structure_tree pageobjnums pdf pagenum namespace t
|
Printf.printf "StEltInfo %s, %s\n" k (Pdfwrite.string_of_pdf v);
|
||||||
|
Hashtbl.replace ei k v;
|
||||||
|
make_structure_tree pageobjnums (pn, ns, ei) pdf t
|
||||||
| StEndEltInfo s::t ->
|
| StEndEltInfo s::t ->
|
||||||
make_structure_tree pageobjnums pdf pagenum namespace t
|
Printf.printf "StEndEltInfo %s\n" s;
|
||||||
|
Hashtbl.remove ei s;
|
||||||
|
make_structure_tree pageobjnums (pn, ns, ei) pdf t
|
||||||
| StDataBeginTree s::t ->
|
| StDataBeginTree s::t ->
|
||||||
|
Printf.printf "StBeginTree %s\n" s;
|
||||||
let tree_contents, rest = find_tree_contents [] 1 t in
|
let tree_contents, rest = find_tree_contents [] 1 t in
|
||||||
StItem {kind = s; namespace = !namespace; alt = None; pageobjnum = None; children = make_structure_tree pageobjnums pdf pagenum namespace tree_contents}
|
let item =
|
||||||
::make_structure_tree pageobjnums pdf pagenum namespace rest
|
StItem {kind = s; namespace = !ns; alt = list_of_hashtbl ei; pageobjnum = None;
|
||||||
|
children = make_structure_tree pageobjnums (pn, ns, ei) pdf tree_contents}
|
||||||
|
in
|
||||||
|
item::make_structure_tree pageobjnums (pn, ns, ei) pdf rest
|
||||||
| StDataEndTree::t ->
|
| StDataEndTree::t ->
|
||||||
error "Too many -end-tags"
|
error "Too many -end-tags"
|
||||||
|
|
||||||
|
@ -679,7 +693,7 @@ let make_structure_tree pdf items =
|
||||||
let objnums = Pdf.page_reference_numbers pdf in
|
let objnums = Pdf.page_reference_numbers pdf in
|
||||||
combine (indx objnums) objnums
|
combine (indx objnums) objnums
|
||||||
in
|
in
|
||||||
make_structure_tree pageobjnums pdf (ref 0) (ref standard_namespace) items
|
make_structure_tree pageobjnums (ref 0, ref "", null_hash ()) pdf items
|
||||||
|
|
||||||
(* Write such a structure tree to a PDF. *)
|
(* Write such a structure tree to a PDF. *)
|
||||||
let write_structure_tree pdf st =
|
let write_structure_tree pdf st =
|
||||||
|
@ -692,16 +706,13 @@ let write_structure_tree pdf st =
|
||||||
let struct_tree_root = Pdf.addobj pdf Pdf.Null in
|
let struct_tree_root = Pdf.addobj pdf Pdf.Null in
|
||||||
let rec mktree struct_tree_parent = function
|
let rec mktree struct_tree_parent = function
|
||||||
| StItem {kind; namespace; pageobjnum; alt; children} ->
|
| StItem {kind; namespace; pageobjnum; alt; children} ->
|
||||||
|
Printf.printf "Found StItem with pageobjnum %s\n" (match pageobjnum with Some x -> string_of_int x | None -> "");
|
||||||
let this_objnum = Pdf.addobj pdf Pdf.Null in
|
let this_objnum = Pdf.addobj pdf Pdf.Null in
|
||||||
begin match pageobjnum with
|
begin match pageobjnum with
|
||||||
| Some p -> add_parentmap p this_objnum
|
| Some p -> add_parentmap p this_objnum
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
end;
|
end;
|
||||||
let alt =
|
let alt = map (fun (k, v) -> ("/" ^ k, v)) alt in
|
||||||
match alt with
|
|
||||||
| Some s -> [("/Alt", Pdf.String s)]
|
|
||||||
| None -> []
|
|
||||||
in
|
|
||||||
let page =
|
let page =
|
||||||
match pageobjnum with
|
match pageobjnum with
|
||||||
| Some i -> [("/Pg", Pdf.Indirect i)]
|
| Some i -> [("/Pg", Pdf.Indirect i)]
|
||||||
|
|
|
@ -60,7 +60,7 @@ type drawops =
|
||||||
| BeginArtifact
|
| BeginArtifact
|
||||||
| EndArtifact
|
| EndArtifact
|
||||||
| Namespace of string
|
| Namespace of string
|
||||||
| EltInfo of string * string
|
| EltInfo of string * Pdf.pdfobject
|
||||||
| EndEltInfo of string
|
| EndEltInfo of string
|
||||||
|
|
||||||
val do_add_artifacts : bool ref
|
val do_add_artifacts : bool ref
|
||||||
|
|
|
@ -123,12 +123,7 @@ let endartifact () =
|
||||||
let addnamespace s =
|
let addnamespace s =
|
||||||
addop (Cpdfdraw.Namespace s)
|
addop (Cpdfdraw.Namespace s)
|
||||||
|
|
||||||
let eltinfo s =
|
let eltinfo k v =
|
||||||
let k, v =
|
|
||||||
match String.split_on_char '=' s with
|
|
||||||
| [h; t] -> (h, t)
|
|
||||||
| _ -> error "Bad -eltinfo format"
|
|
||||||
in
|
|
||||||
addop (Cpdfdraw.EltInfo (k, v))
|
addop (Cpdfdraw.EltInfo (k, v))
|
||||||
|
|
||||||
let endeltinfo s =
|
let endeltinfo s =
|
||||||
|
|
|
@ -20,7 +20,7 @@ val addstag : string -> unit
|
||||||
val endtag : unit -> unit
|
val endtag : unit -> unit
|
||||||
val endstag : unit -> unit
|
val endstag : unit -> unit
|
||||||
val autotags : bool -> unit
|
val autotags : bool -> unit
|
||||||
val eltinfo : string -> unit
|
val eltinfo : string -> Pdf.pdfobject -> unit
|
||||||
val endeltinfo : string -> unit
|
val endeltinfo : string -> unit
|
||||||
val addnamespace : string -> unit
|
val addnamespace : string -> unit
|
||||||
val setrolemap : string -> unit
|
val setrolemap : string -> unit
|
||||||
|
|
Loading…
Reference in New Issue