Namespaces

This commit is contained in:
John Whitington
2024-09-18 16:32:29 +01:00
parent fc28d8c469
commit e33120bf94

View File

@ -242,7 +242,17 @@ let mcpage = ref ~-1
let standard_namespace = "http://iso.org/pdf/ssn" let standard_namespace = "http://iso.org/pdf/ssn"
let pdf2_namespace = "http://iso.org/pdf2/ssn" let pdf2_namespace = "http://iso.org/pdf2/ssn"
let namespace = ref standard_namespace (* namespace, object number pair. *)
let namespaces = null_hash ()
(* Add the object, add its number and this namespace to the hash. *)
let add_namespace pdf s =
if s = standard_namespace then () else
match Hashtbl.find_opt namespaces s with
| Some _ -> ()
| None ->
let objnum = Pdf.addobj pdf (Pdf.Dictionary [("/NS", Pdf.String s)]) in
Hashtbl.add namespaces s objnum
(* The structure data, as it is created, in flat form. Later on, this will be (* The structure data, as it is created, in flat form. Later on, this will be
reconstructed into a structure tree. *) reconstructed into a structure tree. *)
@ -251,6 +261,7 @@ type structdata =
| StDataEndTree | StDataEndTree
| StDataMCID of string * int * string option | StDataMCID of string * int * string option
| StDataPage of int | StDataPage of int
| StDataNamespace of string
let structdata = ref [] let structdata = ref []
@ -435,7 +446,13 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
| EndSTag -> if not dryrun then structdata =| StDataEndTree; [] | EndSTag -> if not dryrun then structdata =| StDataEndTree; []
| BeginArtifact -> [Pdfops.Op_BMC "/BeginArtifact"] | BeginArtifact -> [Pdfops.Op_BMC "/BeginArtifact"]
| EndArtifact -> [Pdfops.Op_BMC "/EndArtifact"] | EndArtifact -> [Pdfops.Op_BMC "/EndArtifact"]
| Namespace s -> if not dryrun then namespace := s; [] | Namespace s ->
if not dryrun then
begin
add_namespace pdf s;
structdata =| StDataNamespace s
end;
[]
and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops = and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops =
flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops) flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops)
@ -585,7 +602,7 @@ let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks =
type st = type st =
StMCID of int StMCID of int
| StItem of {kind : string; pageobjnum : int option; alt : string option; children : st list} | StItem of {kind : string; namespace : string; pageobjnum : int option; alt : string option; children : st list}
(* Build a tree from the MCIDs and structure tree instructions gathered *) (* Build a tree from the MCIDs and structure tree instructions gathered *)
let rec find_tree_contents a level = function let rec find_tree_contents a level = function
@ -596,16 +613,19 @@ let rec find_tree_contents a level = function
if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
| h::t -> find_tree_contents (h::a) level t | h::t -> find_tree_contents (h::a) level t
let rec make_structure_tree pageobjnums pdf pagenum = function let rec make_structure_tree pageobjnums pdf pagenum namespace = function
| [] -> [] | [] -> []
| StDataMCID (n, mcid, alt)::t -> | StDataMCID (n, mcid, alt)::t ->
StItem {kind = n; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}::make_structure_tree pageobjnums pdf pagenum t StItem {kind = n; namespace = !namespace; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}::make_structure_tree pageobjnums pdf pagenum namespace t
| StDataPage n::t -> | StDataPage n::t ->
pagenum := n; pagenum := n;
make_structure_tree pageobjnums pdf pagenum t make_structure_tree pageobjnums pdf pagenum namespace t
| StDataNamespace s::t ->
namespace := s;
make_structure_tree pageobjnums pdf pagenum namespace t
| StDataBeginTree s::t -> | StDataBeginTree s::t ->
let tree_contents, rest = find_tree_contents [] 1 t in let tree_contents, rest = find_tree_contents [] 1 t in
[StItem {kind = s; alt = None; pageobjnum = None; children = make_structure_tree pageobjnums pdf pagenum rest}] [StItem {kind = s; namespace = !namespace; alt = None; pageobjnum = None; children = make_structure_tree pageobjnums pdf pagenum namespace rest}]
| StDataEndTree::t -> | StDataEndTree::t ->
error "Too many -end-tags" error "Too many -end-tags"
@ -614,7 +634,7 @@ let make_structure_tree pdf items =
let objnums = Pdf.page_reference_numbers pdf in let objnums = Pdf.page_reference_numbers pdf in
combine (indx objnums) objnums combine (indx objnums) objnums
in in
make_structure_tree pageobjnums pdf (ref 0) items make_structure_tree pageobjnums pdf (ref 0) (ref standard_namespace) items
(* Write such a structure tree to a PDF. *) (* Write such a structure tree to a PDF. *)
let write_structure_tree pdf st = let write_structure_tree pdf st =
@ -626,7 +646,7 @@ let write_structure_tree pdf st =
in in
let struct_tree_root = Pdf.addobj pdf Pdf.Null in let struct_tree_root = Pdf.addobj pdf Pdf.Null in
let rec mktree struct_tree_parent = function let rec mktree struct_tree_parent = function
| StItem {kind; pageobjnum; alt; children} -> | StItem {kind; namespace; pageobjnum; alt; children} ->
let this_objnum = Pdf.addobj pdf Pdf.Null in let this_objnum = Pdf.addobj pdf Pdf.Null in
begin match pageobjnum with begin match pageobjnum with
| Some p -> add_parentmap p this_objnum | Some p -> add_parentmap p this_objnum
@ -642,10 +662,15 @@ let write_structure_tree pdf st =
| Some i -> [("/Pg", Pdf.Indirect i)] | Some i -> [("/Pg", Pdf.Indirect i)]
| None -> [] | None -> []
in in
let namespace =
if namespace = standard_namespace then [] else
[("/NS", Pdf.Indirect (Hashtbl.find namespaces namespace))]
in
let this_obj = let this_obj =
Pdf.Dictionary Pdf.Dictionary
(alt (alt
@ page @ page
@ namespace
@ [("/S", Pdf.Name kind); @ [("/S", Pdf.Name kind);
("/P", Pdf.Indirect struct_tree_parent); ("/P", Pdf.Indirect struct_tree_parent);
("/K", Pdf.Array (map (mktree this_objnum) children))]) ("/K", Pdf.Array (map (mktree this_objnum) children))])
@ -664,9 +689,16 @@ let write_structure_tree pdf st =
map (fun (pon, items) -> (string_of_int pon, Pdf.Array (map (fun x -> Pdf.Indirect x) (rev items)))) !parentmap map (fun (pon, items) -> (string_of_int pon, Pdf.Array (map (fun x -> Pdf.Indirect x) (rev items)))) !parentmap
in in
let st = let st =
Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot"); let namespaces =
match list_of_hashtbl namespaces with
| [] -> []
| ns -> [("/Namespaces", Pdf.Array (map (function (_, objnum) -> Pdf.Indirect objnum) ns))]
in
Pdf.Dictionary
(namespaces @
[("/Type", Pdf.Name "/StructTreeRoot");
("/ParentTree", Pdf.Indirect (Pdf.addobj pdf (Pdftree.build_name_tree true pdf parentmap))); ("/ParentTree", Pdf.Indirect (Pdf.addobj pdf (Pdftree.build_name_tree true pdf parentmap)));
("/K", Pdf.Array items)] ("/K", Pdf.Array items)])
in in
Pdf.addobj_given_num pdf (struct_tree_root, st); Pdf.addobj_given_num pdf (struct_tree_root, st);
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect struct_tree_root)) Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect struct_tree_root))