From e33120bf940cbd6e7e04a105bd2dfef409d1946a Mon Sep 17 00:00:00 2001
From: John Whitington <john@coherentgraphics.co.uk>
Date: Wed, 18 Sep 2024 16:32:29 +0100
Subject: [PATCH] Namespaces

---
 cpdfdraw.ml | 56 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/cpdfdraw.ml b/cpdfdraw.ml
index 6466196..6d7ecd0 100644
--- a/cpdfdraw.ml
+++ b/cpdfdraw.ml
@@ -242,7 +242,17 @@ let mcpage = ref ~-1
 let standard_namespace = "http://iso.org/pdf/ssn"
 let pdf2_namespace = "http://iso.org/pdf2/ssn"
 
-let namespace = ref standard_namespace 
+(* namespace, object number pair. *)
+let namespaces = null_hash ()
+
+(* Add the object, add its number and this namespace to the hash. *)
+let add_namespace pdf s =
+  if s = standard_namespace then () else
+    match Hashtbl.find_opt namespaces s with
+    | Some _ -> ()
+    | None ->
+        let objnum = Pdf.addobj pdf (Pdf.Dictionary [("/NS", Pdf.String s)]) in
+          Hashtbl.add namespaces s objnum
 
 (* The structure data, as it is created, in flat form. Later on, this will be
    reconstructed into a structure tree. *)
@@ -251,6 +261,7 @@ type structdata =
   | StDataEndTree
   | StDataMCID of string * int * string option
   | StDataPage of int
+  | StDataNamespace of string
 
 let structdata = ref []
 
@@ -435,7 +446,13 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
   | EndSTag -> if not dryrun then structdata =| StDataEndTree; []
   | BeginArtifact -> [Pdfops.Op_BMC "/BeginArtifact"]
   | EndArtifact -> [Pdfops.Op_BMC "/EndArtifact"]
-  | Namespace s -> if not dryrun then namespace := s; []
+  | Namespace s ->
+      if not dryrun then
+        begin
+          add_namespace pdf s;
+          structdata =| StDataNamespace s
+        end;
+        []
 
 and ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page drawops =
   flatten (map (ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num page) drawops)
@@ -585,7 +602,7 @@ let dryrun ~struct_tree ~filename ~bates ~batespad range pdf chunks =
 
 type st =
    StMCID of int
- | StItem of {kind : string; pageobjnum : int option; alt : string option; children : st list}
+ | StItem of {kind : string; namespace : string; pageobjnum : int option; alt : string option; children : st list}
 
 (* Build a tree from the MCIDs and structure tree instructions gathered *)
 let rec find_tree_contents a level = function
@@ -596,16 +613,19 @@ let rec find_tree_contents a level = function
       if level = 1 then (rev a, t) else find_tree_contents a (level - 1) t
   | h::t -> find_tree_contents (h::a) level t
 
-let rec make_structure_tree pageobjnums pdf pagenum = function
+let rec make_structure_tree pageobjnums pdf pagenum namespace = function
   | [] -> []
   | StDataMCID (n, mcid, alt)::t ->
-      StItem {kind = n; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}::make_structure_tree pageobjnums pdf pagenum t
+      StItem {kind = n; namespace = !namespace; alt; pageobjnum = lookup !pagenum pageobjnums; children = [StMCID mcid]}::make_structure_tree pageobjnums pdf pagenum namespace t
   | StDataPage n::t ->
       pagenum := n;
-      make_structure_tree pageobjnums pdf pagenum t
+      make_structure_tree pageobjnums pdf pagenum namespace t
+  | StDataNamespace s::t ->
+      namespace := s;
+      make_structure_tree pageobjnums pdf pagenum namespace t
   | StDataBeginTree s::t ->
       let tree_contents, rest = find_tree_contents [] 1 t in
-        [StItem {kind = s; alt = None; pageobjnum = None; children = make_structure_tree pageobjnums pdf pagenum rest}]
+        [StItem {kind = s; namespace = !namespace; alt = None; pageobjnum = None; children = make_structure_tree pageobjnums pdf pagenum namespace rest}]
   | StDataEndTree::t ->
       error "Too many -end-tags"
 
@@ -614,7 +634,7 @@ let make_structure_tree pdf items =
     let objnums = Pdf.page_reference_numbers pdf in
       combine (indx objnums) objnums
   in
-    make_structure_tree pageobjnums pdf (ref 0) items
+    make_structure_tree pageobjnums pdf (ref 0) (ref standard_namespace) items
 
 (* Write such a structure tree to a PDF. *)
 let write_structure_tree pdf st =
@@ -626,7 +646,7 @@ let write_structure_tree pdf st =
   in
   let struct_tree_root = Pdf.addobj pdf Pdf.Null in
   let rec mktree struct_tree_parent  = function
-    | StItem {kind; pageobjnum; alt; children} ->
+    | StItem {kind; namespace; pageobjnum; alt; children} ->
         let this_objnum = Pdf.addobj pdf Pdf.Null in
           begin match pageobjnum with
           | Some p -> add_parentmap p this_objnum
@@ -642,10 +662,15 @@ let write_structure_tree pdf st =
             | Some i -> [("/Pg", Pdf.Indirect i)]
             | None -> []
           in
+          let namespace =
+            if namespace = standard_namespace then [] else
+              [("/NS", Pdf.Indirect (Hashtbl.find namespaces namespace))]
+          in
           let this_obj =
             Pdf.Dictionary
               (alt
                @ page
+               @ namespace
                @ [("/S", Pdf.Name kind);
                   ("/P", Pdf.Indirect struct_tree_parent);
                   ("/K", Pdf.Array (map (mktree this_objnum) children))])
@@ -664,9 +689,16 @@ let write_structure_tree pdf st =
     map (fun (pon, items) -> (string_of_int pon, Pdf.Array (map (fun x -> Pdf.Indirect x) (rev items)))) !parentmap
   in
   let st =
-    Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot");
-                    ("/ParentTree", Pdf.Indirect (Pdf.addobj pdf (Pdftree.build_name_tree true pdf parentmap))); 
-                    ("/K", Pdf.Array items)]
+    let namespaces =
+      match list_of_hashtbl namespaces with
+      | [] -> []
+      | ns -> [("/Namespaces", Pdf.Array (map (function (_, objnum) -> Pdf.Indirect objnum) ns))]
+    in
+    Pdf.Dictionary
+      (namespaces @
+        [("/Type", Pdf.Name "/StructTreeRoot");
+         ("/ParentTree", Pdf.Indirect (Pdf.addobj pdf (Pdftree.build_name_tree true pdf parentmap))); 
+         ("/K", Pdf.Array items)])
   in
     Pdf.addobj_given_num pdf (struct_tree_root, st);
     Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect struct_tree_root))