Skeleton for structinfo in Cpdftexttopdf

This commit is contained in:
John Whitington 2024-10-03 14:47:57 +01:00
parent 4cb6fca6af
commit 163d631dc9
2 changed files with 34 additions and 3 deletions

View File

@ -518,7 +518,7 @@ let image_of_input ?subformat ?title ~process_struct_tree fobj i =
| _, (Some Cpdfua.PDFUA1 | Some Cpdfua.PDFUA2) | true, _ -> true | _, (Some Cpdfua.PDFUA1 | Some Cpdfua.PDFUA2) | true, _ -> true
| _ -> false | _ -> false
in in
if process_struct_tree || subformat = Some Cpdfua.PDFUA2 then if subformat = Some Cpdfua.PDFUA2 then
begin begin
let str = Pdf.addobj pdf Pdf.Null in let str = Pdf.addobj pdf Pdf.Null in
let figure = Pdf.addobj pdf Pdf.Null in let figure = Pdf.addobj pdf Pdf.Null in
@ -532,7 +532,7 @@ let image_of_input ?subformat ?title ~process_struct_tree fobj i =
("/K", Pdf.Array [Pdf.Indirect document]); ("/ParentTree", Pdf.Indirect parent_tree)]); ("/K", Pdf.Array [Pdf.Indirect document]); ("/ParentTree", Pdf.Indirect parent_tree)]);
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str)) Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str))
end end
else if subformat = Some Cpdfua.PDFUA1 then else if process_struct_tree || subformat = Some Cpdfua.PDFUA1 then
begin begin
let str = Pdf.addobj pdf Pdf.Null in let str = Pdf.addobj pdf Pdf.Null in
let figure = Pdf.addobj pdf Pdf.Null in let figure = Pdf.addobj pdf Pdf.Null in

View File

@ -56,7 +56,9 @@ let tag_paragraphs l =
let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text = let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text =
let process_struct_tree = let process_struct_tree =
process_struct_tree || subformat = Some Cpdfua.PDFUA1 || subformat = Some Cpdfua.PDFUA2 match process_struct_tree, subformat with
| _, (Some Cpdfua.PDFUA1 | Some Cpdfua.PDFUA2) | true, _ -> true
| _ -> false
in in
let pdf, title = let pdf, title =
match subformat with match subformat with
@ -92,6 +94,35 @@ let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize te
let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) tagged) in let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) tagged) in
[firstfont; Cpdftype.BeginDocument] @ tagged [firstfont; Cpdftype.BeginDocument] @ tagged
in in
if subformat = Some Cpdfua.PDFUA2 then
begin
let str = Pdf.addobj pdf Pdf.Null in
let p = Pdf.addobj pdf Pdf.Null in
let parent_tree = Pdf.addobj pdf Pdf.Null in
let namespace = Pdf.addobj pdf (Pdf.Dictionary [("/NS", Pdf.String "http://iso.org/pdf2/ssn")]) in
let document = Pdf.addobj pdf Pdf.Null in
Pdf.addobj_given_num pdf (document, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Indirect p]); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/Document"); ("/NS", Pdf.Indirect namespace)]);
Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]);
Pdf.addobj_given_num pdf (p, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Integer 0]); ("/P", Pdf.Indirect document); ("/S", Pdf.Name "/P")]);
Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Namespaces", Pdf.Array [Pdf.Indirect namespace]); ("/Type", Pdf.Name "/StructTreeRoot");
("/K", Pdf.Array [Pdf.Indirect document]); ("/ParentTree", Pdf.Indirect parent_tree)]);
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str))
end
else if process_struct_tree || subformat = Some Cpdfua.PDFUA1 then
begin
let str = Pdf.addobj pdf Pdf.Null in
let p = Pdf.addobj pdf Pdf.Null in
let parent_tree = Pdf.addobj pdf Pdf.Null in
Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]);
Pdf.addobj_given_num pdf (p, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Integer 0]); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/P")]);
Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot"); ("/K", Pdf.Array [Pdf.Indirect p]); ("/ParentTree", Pdf.Indirect parent_tree)]);
Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str))
end;
let pages, tags = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in let pages, tags = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in
let pages =
map
(fun p -> if process_struct_tree then {p with Pdfpage.rest = Pdf.add_dict_entry p.Pdfpage.rest "/StructParents" (Pdf.Integer 1)} else p)
pages
in
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf Pdfpage.add_root pageroot [] pdf