PDF/UA marking finished

This commit is contained in:
John Whitington 2024-06-03 15:04:58 +01:00
parent a56f222b00
commit 1ecfa219e9
2 changed files with 27 additions and 31 deletions

View File

@ -301,14 +301,14 @@ type xmltree =
| D of string | D of string
let xmltree_of_bytes b = let xmltree_of_bytes b =
let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in let i = Cpdfxmlm.make_input ~strip:true (`String (0, string_of_bytes b)) in
let el tag childs = E (tag, childs) let el tag childs = E (tag, childs)
and data d = D d in and data d = D d in
Cpdfxmlm.input_doc_tree ~el ~data i Cpdfxmlm.input_doc_tree ~el ~data i
let bytes_of_xmltree t = let bytes_of_xmltree t =
let buf = Buffer.create 1024 in let buf = Buffer.create 1024 in
let o = Cpdfxmlm.make_output (`Buffer buf) in let o = Cpdfxmlm.make_output ~indent:(Some 2) (`Buffer buf) in
let frag = function let frag = function
E (tag, childs) -> `El (tag, childs) E (tag, childs) -> `El (tag, childs)
| D d -> `Data d | D d -> `Data d

View File

@ -289,23 +289,26 @@ let test_matterhorn_json pdf =
(test_matterhorn pdf)) (test_matterhorn pdf))
let pdfua_marker = let pdfua_marker =
Cpdfmetadata.(E (((rdf, "Description"), [((rdf, "about"), ""); ((Cpdfxmlm.ns_xmlns, "pdfuaid"), pdfuaid)]), [E (((pdfuaid, "part"), []), [D "1"])])) Cpdfmetadata.(E (((rdf, "Description"), [((rdf, "about"), ""); ((Cpdfxmlm.ns_xmlns, "pdfuaid"), pdfuaid)]), [E (((pdfuaid, "part"), []), [D "1"])]))
let rec strip_ds = function let rec insert_as_rdf_description fragment = function
| Cpdfmetadata.D _::r -> strip_ds r | Cpdfmetadata.E (((_, "RDF"), _) as rdftag, rdfs) ->
| x -> x Cpdfmetadata.E (rdftag, fragment::rdfs)
let insert_as_rdf_description fragment = function
| Cpdfmetadata.E (((_, "xmpmeta"), _) as xmptag, cs) -> | Cpdfmetadata.E (((_, "xmpmeta"), _) as xmptag, cs) ->
begin match strip_ds cs with Cpdfmetadata.E (xmptag, map (insert_as_rdf_description fragment) cs)
| Cpdfmetadata.E (((_, "RDF"), _) as rdftag, rdfs)::more ->
Cpdfmetadata.E (xmptag, Cpdfmetadata.E (rdftag, fragment::rdfs)::more)
| _ -> error "could not locate RDF"
end
| _ -> error "insert_as_rdf_description: could not find insertion point." | _ -> error "insert_as_rdf_description: could not find insertion point."
(* FIXME *) let rec delete_pdfua_marker tree =
let delete_pdfua_marker tree = tree let is_pdfuaid = function
| Cpdfmetadata.E (((pdfuaid, "part"), _), _) when pdfuaid = Cpdfmetadata.pdfuaid -> true
| _ -> false
in
match tree with
| Cpdfmetadata.E (((rdf, "Description"), _), c) when rdf = Cpdfmetadata.rdf && List.exists is_pdfuaid c ->
Cpdfmetadata.D ""
| Cpdfmetadata.E (x, children) ->
Cpdfmetadata.E (x, map delete_pdfua_marker children)
| x -> x
let mark pdf = let mark pdf =
let pdf2 = if Cpdfmetadata.get_metadata pdf = None then Cpdfmetadata.create_metadata pdf else pdf in let pdf2 = if Cpdfmetadata.get_metadata pdf = None then Cpdfmetadata.create_metadata pdf else pdf in
@ -315,22 +318,15 @@ let mark pdf =
match Cpdfmetadata.get_metadata pdf with match Cpdfmetadata.get_metadata pdf with
| Some metadata -> | Some metadata ->
let dtd, tree = Cpdfmetadata.xmltree_of_bytes metadata in let dtd, tree = Cpdfmetadata.xmltree_of_bytes metadata in
let newtree =
begin match Cpdfmetadata.get_data_for Cpdfmetadata.pdfuaid "part" tree with begin match Cpdfmetadata.get_data_for Cpdfmetadata.pdfuaid "part" tree with
| Some _ -> | Some _ -> insert_as_rdf_description pdfua_marker (delete_pdfua_marker tree)
let newtree = delete_pdfua_marker tree in | None -> insert_as_rdf_description pdfua_marker tree
let newtree = insert_as_rdf_description pdfua_marker newtree in
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
pdf.Pdf.objects <- pdf3.Pdf.objects;
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
pdf.Pdf.root <- pdf3.Pdf.root
| None ->
let newtree = insert_as_rdf_description pdfua_marker tree in
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
(*Printf.printf "string_of_metadata: %s\n" (Cpdfmetadata.string_of_xmltree newtree);*)
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
pdf.Pdf.objects <- pdf3.Pdf.objects;
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
pdf.Pdf.root <- pdf3.Pdf.root
end end
in
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
pdf.Pdf.objects <- pdf3.Pdf.objects;
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
pdf.Pdf.root <- pdf3.Pdf.root
| None -> assert false | None -> assert false