From e39af4218a8137abb56dd859598821d92c3af82b Mon Sep 17 00:00:00 2001 From: John Whitington Date: Mon, 21 Oct 2024 13:44:20 +0100 Subject: [PATCH] Moves text autotagging from textsection to -text --- cpdfdraw.ml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cpdfdraw.ml b/cpdfdraw.ml index c85ffb4..db83e02 100644 --- a/cpdfdraw.ml +++ b/cpdfdraw.ml @@ -442,16 +442,14 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num (res ()).font_size <- size; [] | TextSection ops -> + [Pdfops.Op_BT] @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops @ [Pdfops.Op_ET] + | Text s -> + if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); let m = mcid () in if not dryrun && !do_auto_tag then structdata := StDataMCID ("/P", m)::!structdata; (if struct_tree && !do_auto_tag then [Pdfops.Op_BDC ("/P", Pdf.Dictionary ["/MCID", Pdf.Integer m])] else []) - @ [Pdfops.Op_BT] - @ ops_of_drawops struct_tree dryrun pdf endpage filename bates batespad num page ops - @ [Pdfops.Op_ET] + @ fst (runs_of_utf8 s) @ (if struct_tree && !do_auto_tag then [Pdfops.Op_EMC] else []) - | Text s -> - if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); - fst (runs_of_utf8 s) | SpecialText s -> let s = process_specials pdf endpage filename bates batespad num page s in if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s);