PDF/UA marking finished
This commit is contained in:
parent
a56f222b00
commit
1ecfa219e9
|
@ -301,14 +301,14 @@ type xmltree =
|
||||||
| D of string
|
| D of string
|
||||||
|
|
||||||
let xmltree_of_bytes b =
|
let xmltree_of_bytes b =
|
||||||
let i = Cpdfxmlm.make_input (`String (0, string_of_bytes b)) in
|
let i = Cpdfxmlm.make_input ~strip:true (`String (0, string_of_bytes b)) in
|
||||||
let el tag childs = E (tag, childs)
|
let el tag childs = E (tag, childs)
|
||||||
and data d = D d in
|
and data d = D d in
|
||||||
Cpdfxmlm.input_doc_tree ~el ~data i
|
Cpdfxmlm.input_doc_tree ~el ~data i
|
||||||
|
|
||||||
let bytes_of_xmltree t =
|
let bytes_of_xmltree t =
|
||||||
let buf = Buffer.create 1024 in
|
let buf = Buffer.create 1024 in
|
||||||
let o = Cpdfxmlm.make_output (`Buffer buf) in
|
let o = Cpdfxmlm.make_output ~indent:(Some 2) (`Buffer buf) in
|
||||||
let frag = function
|
let frag = function
|
||||||
E (tag, childs) -> `El (tag, childs)
|
E (tag, childs) -> `El (tag, childs)
|
||||||
| D d -> `Data d
|
| D d -> `Data d
|
||||||
|
|
54
cpdfua.ml
54
cpdfua.ml
|
@ -289,23 +289,26 @@ let test_matterhorn_json pdf =
|
||||||
(test_matterhorn pdf))
|
(test_matterhorn pdf))
|
||||||
|
|
||||||
let pdfua_marker =
|
let pdfua_marker =
|
||||||
Cpdfmetadata.(E (((rdf, "Description"), [((rdf, "about"), ""); ((Cpdfxmlm.ns_xmlns, "pdfuaid"), pdfuaid)]), [E (((pdfuaid, "part"), []), [D "1"])]))
|
Cpdfmetadata.(E (((rdf, "Description"), [((rdf, "about"), ""); ((Cpdfxmlm.ns_xmlns, "pdfuaid"), pdfuaid)]), [E (((pdfuaid, "part"), []), [D "1"])]))
|
||||||
|
|
||||||
let rec strip_ds = function
|
let rec insert_as_rdf_description fragment = function
|
||||||
| Cpdfmetadata.D _::r -> strip_ds r
|
| Cpdfmetadata.E (((_, "RDF"), _) as rdftag, rdfs) ->
|
||||||
| x -> x
|
Cpdfmetadata.E (rdftag, fragment::rdfs)
|
||||||
|
|
||||||
let insert_as_rdf_description fragment = function
|
|
||||||
| Cpdfmetadata.E (((_, "xmpmeta"), _) as xmptag, cs) ->
|
| Cpdfmetadata.E (((_, "xmpmeta"), _) as xmptag, cs) ->
|
||||||
begin match strip_ds cs with
|
Cpdfmetadata.E (xmptag, map (insert_as_rdf_description fragment) cs)
|
||||||
| Cpdfmetadata.E (((_, "RDF"), _) as rdftag, rdfs)::more ->
|
|
||||||
Cpdfmetadata.E (xmptag, Cpdfmetadata.E (rdftag, fragment::rdfs)::more)
|
|
||||||
| _ -> error "could not locate RDF"
|
|
||||||
end
|
|
||||||
| _ -> error "insert_as_rdf_description: could not find insertion point."
|
| _ -> error "insert_as_rdf_description: could not find insertion point."
|
||||||
|
|
||||||
(* FIXME *)
|
let rec delete_pdfua_marker tree =
|
||||||
let delete_pdfua_marker tree = tree
|
let is_pdfuaid = function
|
||||||
|
| Cpdfmetadata.E (((pdfuaid, "part"), _), _) when pdfuaid = Cpdfmetadata.pdfuaid -> true
|
||||||
|
| _ -> false
|
||||||
|
in
|
||||||
|
match tree with
|
||||||
|
| Cpdfmetadata.E (((rdf, "Description"), _), c) when rdf = Cpdfmetadata.rdf && List.exists is_pdfuaid c ->
|
||||||
|
Cpdfmetadata.D ""
|
||||||
|
| Cpdfmetadata.E (x, children) ->
|
||||||
|
Cpdfmetadata.E (x, map delete_pdfua_marker children)
|
||||||
|
| x -> x
|
||||||
|
|
||||||
let mark pdf =
|
let mark pdf =
|
||||||
let pdf2 = if Cpdfmetadata.get_metadata pdf = None then Cpdfmetadata.create_metadata pdf else pdf in
|
let pdf2 = if Cpdfmetadata.get_metadata pdf = None then Cpdfmetadata.create_metadata pdf else pdf in
|
||||||
|
@ -315,22 +318,15 @@ let mark pdf =
|
||||||
match Cpdfmetadata.get_metadata pdf with
|
match Cpdfmetadata.get_metadata pdf with
|
||||||
| Some metadata ->
|
| Some metadata ->
|
||||||
let dtd, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
let dtd, tree = Cpdfmetadata.xmltree_of_bytes metadata in
|
||||||
|
let newtree =
|
||||||
begin match Cpdfmetadata.get_data_for Cpdfmetadata.pdfuaid "part" tree with
|
begin match Cpdfmetadata.get_data_for Cpdfmetadata.pdfuaid "part" tree with
|
||||||
| Some _ ->
|
| Some _ -> insert_as_rdf_description pdfua_marker (delete_pdfua_marker tree)
|
||||||
let newtree = delete_pdfua_marker tree in
|
| None -> insert_as_rdf_description pdfua_marker tree
|
||||||
let newtree = insert_as_rdf_description pdfua_marker newtree in
|
|
||||||
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
|
|
||||||
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
|
|
||||||
pdf.Pdf.objects <- pdf3.Pdf.objects;
|
|
||||||
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
|
|
||||||
pdf.Pdf.root <- pdf3.Pdf.root
|
|
||||||
| None ->
|
|
||||||
let newtree = insert_as_rdf_description pdfua_marker tree in
|
|
||||||
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
|
|
||||||
(*Printf.printf "string_of_metadata: %s\n" (Cpdfmetadata.string_of_xmltree newtree);*)
|
|
||||||
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
|
|
||||||
pdf.Pdf.objects <- pdf3.Pdf.objects;
|
|
||||||
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
|
|
||||||
pdf.Pdf.root <- pdf3.Pdf.root
|
|
||||||
end
|
end
|
||||||
|
in
|
||||||
|
let newbytes = Cpdfmetadata.bytes_of_xmltree (dtd, newtree) in
|
||||||
|
let pdf3 = Cpdfmetadata.set_metadata_from_bytes true newbytes pdf in
|
||||||
|
pdf.Pdf.objects <- pdf3.Pdf.objects;
|
||||||
|
pdf.Pdf.trailerdict <- pdf3.Pdf.trailerdict;
|
||||||
|
pdf.Pdf.root <- pdf3.Pdf.root
|
||||||
| None -> assert false
|
| None -> assert false
|
||||||
|
|
Loading…
Reference in New Issue