From 4cb6fca6af2fdaf2f7b07e99a2af0d65cff60e44 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Thu, 3 Oct 2024 12:32:26 +0100 Subject: [PATCH] Return tags from typesetter --- cpdftexttopdf.ml | 2 +- cpdftoc.ml | 2 +- cpdftype.ml | 11 ++++++++--- cpdftype.mli | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml index dc0567d..90cc306 100644 --- a/cpdftexttopdf.ml +++ b/cpdftexttopdf.ml @@ -92,6 +92,6 @@ let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize te let firstfont = hd (keep (function Cpdftype.Font _ -> true | _ -> false) tagged) in [firstfont; Cpdftype.BeginDocument] @ tagged in - let pages = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in + let pages, tags = Cpdftype.typeset ~process_struct_tree margin margin margin margin papersize pdf instrs in let pdf, pageroot = Pdfpage.add_pagetree pages pdf in Pdfpage.add_root pageroot [] pdf diff --git a/cpdftoc.ml b/cpdftoc.ml index 81d818f..43c8319 100644 --- a/cpdftoc.ml +++ b/cpdftoc.ml @@ -156,7 +156,7 @@ let typeset_table_of_contents ~font ~fontsize ~title ~bookmark pdf = Cpdftype.NewLine]) (Pdfmarks.read_bookmarks pdf) in - let toc_pages = + let toc_pages, _ = let title = let glue = Cpdftype.VGlue (fontsize *. 2.) in if title = "" then [] else diff --git a/cpdftype.ml b/cpdftype.ml index 2a331e1..4fc061a 100644 --- a/cpdftype.ml +++ b/cpdftype.ml @@ -263,6 +263,8 @@ let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i let thispageannotations = ref [] in let thisdestrectangles = ref [] in let pages = ref [] in + let tags = ref [] in + let tagsout = ref [] in let write_page () = let ops = if process_struct_tree then add_artifacts (rev !ops) else rev !ops in let page = @@ -272,7 +274,8 @@ let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i Pdfpage.rotate = Pdfpage.Rotate0; Pdfpage.rest = make_annotations pdf !thispageannotations} in - pages := page :: !pages + pages := page::!pages; + tagsout := rev !tags::!tagsout in let rec typeset_element = function | Text cps -> @@ -340,9 +343,11 @@ let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i thispageannotations := map annot !thisdestrectangles @ !thispageannotations; s.dest <- None; thisdestrectangles := [] - | Tag (s, _) -> ops := Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary [("/MCID", Pdf.Integer (mcid ()))])::!ops + | Tag (s, i) -> + tags := (s, i)::!tags; + ops := Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary [("/MCID", Pdf.Integer (mcid ()))])::!ops | EndTag -> ops := Pdfops.Op_EMC::!ops in iter typeset_element i; write_page (); - rev !pages + (rev !pages, rev !tagsout) diff --git a/cpdftype.mli b/cpdftype.mli index a0cd39c..8946bf6 100644 --- a/cpdftype.mli +++ b/cpdftype.mli @@ -24,6 +24,6 @@ val font_widths : string -> Pdftext.font -> float -> float array val width_of_string : float array -> char list -> float (** [typeset lmargin rmargin tmargin bmargin papersize pdf contents] builds a list of pages of typset content. *) -val typeset : process_struct_tree:bool -> float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list +val typeset : process_struct_tree:bool -> float -> float -> float -> float -> Pdfpaper.t -> Pdf.t -> t -> Pdfpage.t list * (string * int) list list val add_artifacts : Pdfops.t list -> Pdfops.t list