From 7e71f1b4f7177022af23804a2c2fec6cbeb7bff1 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 31 May 2024 17:26:36 +0100 Subject: [PATCH] Progress towards -mark-as --- cpdfmetadata.ml | 13 ++++--------- cpdfmetadata.mli | 4 ++++ cpdfua.ml | 29 +++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/cpdfmetadata.ml b/cpdfmetadata.ml index 34e0e9e..f4849a5 100644 --- a/cpdfmetadata.ml +++ b/cpdfmetadata.ml @@ -56,11 +56,6 @@ let xmp_template = |} -let pdfua_marker = -{| - 1 - "|} - (* Set or replace metadata *) let set_metadata_from_bytes keepversion data pdf = let metadata_stream = @@ -318,11 +313,11 @@ let bytes_of_xmltree t = Cpdfxmlm.output_doc_tree frag o t; bytes_of_string (Buffer.contents buf) -(*let rec string_of_xmltree = function +let rec string_of_xmltree = function D d -> - Printf.sprintf "DATA {%s}" d + Printf.sprintf "D {%s}" d | E (tag, trees) -> - Printf.sprintf "ELT (%s, %s)" + Printf.sprintf "E (%s, %s)" (string_of_tag tag) (string_of_xmltrees trees) @@ -340,7 +335,7 @@ and string_of_attributes attrs = and string_of_xmltrees trees = fold_left - (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees)*) + (fun a b -> a ^ " " ^ b) "" (map string_of_xmltree trees) let adobe = "http://ns.adobe.com/pdf/1.3/" let xmp = "http://ns.adobe.com/xap/1.0/" diff --git a/cpdfmetadata.mli b/cpdfmetadata.mli index c37d7fe..ebbda15 100644 --- a/cpdfmetadata.mli +++ b/cpdfmetadata.mli @@ -104,3 +104,7 @@ type xmltree = val xmltree_of_bytes : Pdfio.bytes -> Cpdfxmlm.dtd * xmltree val get_data_for : string -> string -> xmltree -> string option + +val string_of_xmltree : xmltree -> string + +val bytes_of_xmltree : Cpdfxmlm.dtd * xmltree -> Pdfio.bytes diff --git a/cpdfua.ml b/cpdfua.ml index a4fc429..80c9ae4 100644 --- a/cpdfua.ml +++ b/cpdfua.ml @@ -287,9 +287,34 @@ let test_matterhorn_json pdf = `Assoc [("name", `String name); ("section", `String section); ("error", `String error); ("extra", extra)]) (test_matterhorn pdf)) +let pdfua_marker = + Cpdfmetadata.(E (((rdf, "Description"), [((rdf, "about"), ""); (("xmlns", "pdfuaid"), pdfuaid)]), [E (((pdfuaid, "part"), []), [D "1"])])) + +(*{| + 1 + "|}*) + let mark pdf = let pdf2 = if Cpdfmetadata.get_metadata pdf = None then Cpdfmetadata.create_metadata pdf else pdf in pdf.Pdf.objects <- pdf2.Pdf.objects; pdf.Pdf.trailerdict <- pdf2.Pdf.trailerdict; - pdf.Pdf.root <- pdf.Pdf.root; - () + pdf.Pdf.root <- pdf2.Pdf.root; + match Cpdfmetadata.get_metadata pdf with + | Some metadata -> + let dtd, tree = Cpdfmetadata.xmltree_of_bytes metadata in + (*Printf.printf "string_of_metadata: %s\n" (Cpdfmetadata.string_of_xmltree tree);*) + begin match Cpdfmetadata.get_data_for Cpdfmetadata.pdfuaid "part" tree with + | Some _ -> () (* Already so marked. *) + | None -> + (* If not, add our pdfua_marker to the list *) + let newtree = tree in + (*Cpdfmetadata.(match tree with + | E (("rdf"*) + let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in + (* Write the metadata stream back. *) + let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in + pdf.Pdf.objects <- pdf3.Pdf.objects; + pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict; + pdf.Pdf.root <- pdf3.Pdf.root + end + | None -> assert false