From 3d8f233c0d3d3054ce6a33773ad2db90690e73c3 Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 2 Oct 2024 16:39:16 +0100 Subject: [PATCH] Pagination of tags with paragraph splitting --- cpdftexttopdf.ml | 4 ++-- cpdftype.ml | 21 ++++++++++++++++----- cpdftype.mli | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/cpdftexttopdf.ml b/cpdftexttopdf.ml index bb782fc..dc0567d 100644 --- a/cpdftexttopdf.ml +++ b/cpdftexttopdf.ml @@ -47,12 +47,12 @@ let of_utf8_with_newlines fontpack fontsize t = (* Post process, adding Tag / EndTag around paragraphs *) let rec tag_paragraphs = function | Cpdftype.NewLine::Cpdftype.NewLine::t -> - Cpdftype.EndTag::Cpdftype.NewLine::Cpdftype.NewLine::Cpdftype.Tag "P"::tag_paragraphs t + Cpdftype.EndTag::Cpdftype.NewLine::Cpdftype.NewLine::Cpdftype.Tag ("P", 0)::tag_paragraphs t | x::t -> x::tag_paragraphs t | [] -> [Cpdftype.EndTag] let tag_paragraphs l = - Cpdftype.Tag "P"::tag_paragraphs l + Cpdftype.Tag ("P", 0)::tag_paragraphs l let typeset ~process_struct_tree ?subformat ?title ~papersize ~font ~fontsize text = let process_struct_tree = diff --git a/cpdftype.ml b/cpdftype.ml index 7497754..7190989 100644 --- a/cpdftype.ml +++ b/cpdftype.ml @@ -18,7 +18,7 @@ type element = | BeginDest of Pdfdest.t | EndDest | BeginDocument -| Tag of string +| Tag of string * int | EndTag let to_string_elt = function @@ -31,7 +31,7 @@ let to_string_elt = function | BeginDest _ -> "BeginDest" | EndDest -> "EndDest" | BeginDocument -> "BeginDocument" - | Tag s -> "Tag " ^ s + | Tag (s, i) -> "Tag " ^ s ^ " " ^ string_of_int i | EndTag -> "EndTag" let to_string es = fold_left (fun a b -> a ^ "\n" ^ b) "" (map to_string_elt es) @@ -183,13 +183,16 @@ let layout lmargin rmargin papersize i = iter layout_element i; rev !o -(* Paginate, simply line-based. When ypos + lineheight exceeds max_ypos, we insert a page break. *) +(* Paginate, simply line-based. When ypos + lineheight exceeds max_ypos, we + insert a page break. In addition, we re-write any paragraph tag/endtag to + make sure they appear on both pages. *) let paginate tmargin bmargin papersize i = let height = Pdfunits.points (Pdfpaper.height papersize) (Pdfpaper.unit papersize) in let o = ref [] in let s = initial_state () in s.ypos <- tmargin; let max_ypos = height -. bmargin in + let tag = ref None in let rec process = function | VGlue len as glue -> s.ypos <- s.ypos +. len; @@ -206,10 +209,18 @@ let paginate tmargin bmargin papersize i = o := Font (id, f, fs)::!o | NewPage -> s.ypos <- tmargin +. s.fontsize; - o := NewPage::!o + begin match !tag with Some (s, i) -> o := EndTag::!o | None -> () end; + o := NewPage::!o; + begin match !tag with Some (s, i) -> o := Tag (s, i)::!o | None -> () end | BeginDocument -> s.ypos <- tmargin +. s.fontsize; o := BeginDocument::!o + | Tag (s, i) -> + tag := Some (s, i); + o := Tag (s, i)::!o + | EndTag -> + tag := None; + o := EndTag::!o | x -> o := x::!o in iter process i; @@ -323,7 +334,7 @@ let typeset ~process_struct_tree lmargin rmargin tmargin bmargin papersize pdf i thispageannotations := map annot !thisdestrectangles @ !thispageannotations; s.dest <- None; thisdestrectangles := [] - | Tag s -> ops := Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary [("/MCID", Pdf.Integer (mcid ()))])::!ops + | Tag (s, _) -> ops := Pdfops.Op_BDC ("/" ^ s, Pdf.Dictionary [("/MCID", Pdf.Integer (mcid ()))])::!ops | EndTag -> ops := Pdfops.Op_EMC::!ops in iter typeset_element i; diff --git a/cpdftype.mli b/cpdftype.mli index 1af6bb6..a0cd39c 100644 --- a/cpdftype.mli +++ b/cpdftype.mli @@ -9,7 +9,7 @@ type element = | BeginDest of Pdfdest.t | EndDest | BeginDocument -| Tag of string +| Tag of string * int | EndTag type t = element list