From 31dec76f4be88ac6b616706d7b14e5fa4b97215e Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 4 Oct 2024 13:41:47 +0100 Subject: [PATCH] Some MCRs --- cpdftexttopdf.ml | 60 ++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml index 372008b..c872b86 100644 --- a/cpdftexttopdf.ml +++ b/cpdftexttopdf.ml @@ -128,30 +128,6 @@ let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize te Printf.printf "Paragraph:\n"; iter (fun (para, page, mcid) -> Printf.printf "Para %i, Page %i, MCID %i\n" para page mcid) parts_of_para) nodes; - if subformat = Some Cpdfua.PDFUA2 then - begin - let str = Pdf.addobj pdf Pdf.Null in - let p = Pdf.addobj pdf Pdf.Null in - let parent_tree = Pdf.addobj pdf Pdf.Null in - let namespace = Pdf.addobj pdf (Pdf.Dictionary [("/NS", Pdf.String "http://iso.org/pdf2/ssn")]) in - let document = Pdf.addobj pdf Pdf.Null in - Pdf.addobj_given_num pdf (document, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Indirect p]); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/Document"); ("/NS", Pdf.Indirect namespace)]); - Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]); - Pdf.addobj_given_num pdf (p, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Integer 0]); ("/P", Pdf.Indirect document); ("/S", Pdf.Name "/P")]); - Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Namespaces", Pdf.Array [Pdf.Indirect namespace]); ("/Type", Pdf.Name "/StructTreeRoot"); - ("/K", Pdf.Array [Pdf.Indirect document]); ("/ParentTree", Pdf.Indirect parent_tree)]); - Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str)) - end - else if process_struct_tree || subformat = Some Cpdfua.PDFUA1 then - begin - let str = Pdf.addobj pdf Pdf.Null in - let p = Pdf.addobj pdf Pdf.Null in - let parent_tree = Pdf.addobj pdf Pdf.Null in - Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]); - Pdf.addobj_given_num pdf (p, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Integer 0]); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/P")]); - Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot"); ("/K", Pdf.Array [Pdf.Indirect p]); ("/ParentTree", Pdf.Indirect parent_tree)]); - Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str)) - end; let pages = map2 (fun pn p -> if process_struct_tree then {p with Pdfpage.rest = Pdf.add_dict_entry p.Pdfpage.rest "/StructParents" (Pdf.Integer pn)} else p) @@ -159,4 +135,38 @@ let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize te pages in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in - Pdfpage.add_root pageroot [] pdf + let pdf = Pdfpage.add_root pageroot [] pdf in + let refnums = let ns = Pdf.page_reference_numbers pdf in combine (indx0 ns) ns in + if subformat = Some Cpdfua.PDFUA2 then + begin + let str = Pdf.addobj pdf Pdf.Null in + let p = Pdf.addobj pdf Pdf.Null in + let parent_tree = Pdf.addobj pdf Pdf.Null in + let namespace = Pdf.addobj pdf (Pdf.Dictionary [("/NS", Pdf.String "http://iso.org/pdf2/ssn")]) in + let document = Pdf.addobj pdf Pdf.Null in + Pdf.addobj_given_num pdf (document, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Indirect p]); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/Document"); ("/NS", Pdf.Indirect namespace)]); + Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]); + Pdf.addobj_given_num pdf (p, Pdf.Dictionary [("/K", Pdf.Array [Pdf.Integer 0]); ("/P", Pdf.Indirect document); ("/S", Pdf.Name "/P")]); + Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Namespaces", Pdf.Array [Pdf.Indirect namespace]); ("/Type", Pdf.Name "/StructTreeRoot"); + ("/K", Pdf.Array [Pdf.Indirect document]); ("/ParentTree", Pdf.Indirect parent_tree)]); + Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str)) + end + else if process_struct_tree || subformat = Some Cpdfua.PDFUA1 then + begin + let str = Pdf.addobj pdf Pdf.Null in + let p = Pdf.addobj pdf Pdf.Null in + let parent_tree = Pdf.addobj pdf Pdf.Null in + let topks = + map + (fun parts_of_para -> + let ks = + map (fun (_, pagenumber, mcid) -> Pdf.Dictionary [("/Type", Pdf.Name "/MCR"); ("/Pg", Pdf.Indirect (unopt (lookup pagenumber refnums))); ("/MCID", Pdf.Integer mcid)]) parts_of_para + in + Pdf.Indirect (Pdf.addobj pdf (Pdf.Dictionary [("/K", Pdf.Array ks); ("/P", Pdf.Indirect str); ("/S", Pdf.Name "/P")]))) + nodes + in + Pdf.addobj_given_num pdf (parent_tree, Pdf.Dictionary [("/Nums", Pdf.Array [Pdf.Integer 1; Pdf.Array [Pdf.Indirect p]])]); + Pdf.addobj_given_num pdf (str, Pdf.Dictionary [("/Type", Pdf.Name "/StructTreeRoot"); ("/K", Pdf.Array topks); ("/ParentTree", Pdf.Indirect parent_tree)]); + Pdf.replace_chain pdf ["/Root"] ("/StructTreeRoot", (Pdf.Indirect str)) + end; + pdf