From c281dd492dd9d8b8cc1483d331469c2bd5dba90f Mon Sep 17 00:00:00 2001 From: John Whitington Date: Fri, 20 Sep 2024 14:15:10 +0100 Subject: [PATCH] Indenting --- Changes | 2 ++ cpdfcommand.ml | 11 ++++++++--- cpdfdraw.ml | 19 ++++++++++++------- cpdfdraw.mli | 2 +- cpdfdrawcontrol.ml | 6 ++++-- cpdfdrawcontrol.mli | 1 + 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/Changes b/Changes index ad311c2..b1bcfcd 100644 --- a/Changes +++ b/Changes @@ -13,6 +13,8 @@ New features: * Create structure information for files with -draw * Draw can now make paragraphs with -para, -paras * Add structure information to -typeset, add -typeset-subformat +* -typeset can make PDF/UA documents +* -jpeg, -png and friends can make PDF/UA documents * = Supported by a grant from NLnet diff --git a/cpdfcommand.ml b/cpdfcommand.ml index ade6ba9..336aa38 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -548,7 +548,8 @@ type args = mutable verify_single : string option; mutable draw_struct_tree : bool; mutable image_title : string option; - mutable subformat : Cpdfua.subformat option} + mutable subformat : Cpdfua.subformat option; + mutable indent : float option} let args = {op = None; @@ -686,7 +687,8 @@ let args = verify_single = None; draw_struct_tree = false; image_title = None; - subformat = None} + subformat = None; + indent = None} (* Do not reset original_filename or cpdflin or was_encrypted or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or @@ -812,7 +814,8 @@ let reset_arguments () = args.verify_single <- None; args.draw_struct_tree <- false; args.image_title <- None; - args.subformat <- None + args.subformat <- None; + args.indent <- None (* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume installed at a system place *) @@ -1826,6 +1829,7 @@ let () = Cpdfdrawcontrol.getfontname := fun () -> args.fontname let () = Cpdfdrawcontrol.getfontsize := fun () -> args.fontsize let () = Cpdfdrawcontrol.setfontname := setfont let () = Cpdfdrawcontrol.setfontsize := fun s -> args.fontsize <- s +let () = Cpdfdrawcontrol.getindent := fun () -> args.indent let setlistimagesjson () = setop ListImages (); @@ -2868,6 +2872,7 @@ let specs = ("-stext", Arg.String Cpdfdrawcontrol.addspecialtext, " Draw text with %specials"); ("-para", Arg.String Cpdfdrawcontrol.addpara, " Add a paragraph of text"); ("-paras", Arg.String Cpdfdrawcontrol.addparas, " Add paragraphs of text, splitting on newlines"); + ("-indent", Arg.Float (fun f -> args.indent <- Some f), " Set indent for paragraphs"); ("-leading", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.Leading f)), " Set leading"); ("-charspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.CharSpace f)), " Set character spacing"); ("-wordspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.WordSpace f)), " Set word space"); diff --git a/cpdfdraw.ml b/cpdfdraw.ml index 56dfef5..de3603f 100644 --- a/cpdfdraw.ml +++ b/cpdfdraw.ml @@ -50,7 +50,7 @@ type drawops = | TextSection of drawops list | Text of string | SpecialText of string - | Para of justification * float * string list + | Para of float option * justification * float * string list | Newline | Leading of float | CharSpace of float @@ -266,20 +266,24 @@ type structdata = let structdata = ref [] (* TODO: Use Uuseg for proper unicode segmentation. *) -let format_paragraph j w s = +let format_paragraph indent j w s = + Printf.printf "indent = %f\n" indent; let ss = String.split_on_char ' ' s in let rs_and_widths = ref (map runs_of_utf8 ss) in let space_runs, space_width = runs_of_utf8 " " in let remaining = ref w in let allops = ref [] in let ops = ref [] in + let first = ref true in + let firstloop = ref true in let justify ops = match j with - | Left -> ops + | Left -> (if !first then [Pdfops.Op_Td (~-.indent, 0.)] else []) @ ops @ (if !first then [Pdfops.Op_Td (indent, 0.)] else []) | Right -> [Pdfops.Op_Td (~-.(!remaining), 0.)] @ ops @ [Pdfops.Op_Td (!remaining, 0.)] | Centre -> [Pdfops.Op_Td (~-.(!remaining) /. 2., 0.)] @ ops @ [Pdfops.Op_Td (!remaining /. 2., 0.)] in while !rs_and_widths <> [] do + if !firstloop then (remaining -.= indent; clear firstloop); let word, word_width = hd !rs_and_widths in if !remaining = w then (* If current line empty, output word. *) @@ -300,9 +304,10 @@ let format_paragraph j w s = (* If current line not empty, and not enough space, emit newline. *) begin allops =| rev (Pdfops.Op_T'::justify !ops); + clear first; ops := []; - remaining := w - end + remaining := w; + end; done; allops =| rev (Pdfops.Op_T'::justify !ops); flatten (rev !allops) @@ -427,13 +432,13 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num let s = process_specials pdf endpage filename bates batespad num page s in if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); fst (runs_of_utf8 s) - | Para (j, w, s) -> + | Para (indent, j, w, s) -> if dryrun then iter (iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ())) (map Pdftext.codepoints_of_utf8 s); let first = ref true in flatten (map (function para -> - (if not !first then ([Pdfops.Op_T']) else (clear first; [])) @ format_paragraph j w para) + (if not !first && indent = None then ([Pdfops.Op_T']) else (clear first; [])) @ format_paragraph (if indent <> None && not !first then unopt indent else 0.) j w para) s) | Leading f -> [Pdfops.Op_TL f] | CharSpace f -> [Pdfops.Op_Tc f] diff --git a/cpdfdraw.mli b/cpdfdraw.mli index 981ce93..78aae37 100644 --- a/cpdfdraw.mli +++ b/cpdfdraw.mli @@ -45,7 +45,7 @@ type drawops = | TextSection of drawops list | Text of string | SpecialText of string - | Para of justification * float * string list + | Para of float option * justification * float * string list | Newline | Leading of float | CharSpace of float diff --git a/cpdfdrawcontrol.ml b/cpdfdrawcontrol.ml index 20e9ee6..9562d32 100644 --- a/cpdfdrawcontrol.ml +++ b/cpdfdrawcontrol.ml @@ -10,6 +10,7 @@ let setfontname = ref (fun _ -> Printf.printf "BAD *****\n%!") let setfontsize = ref (fun _ -> Printf.printf "BAD ******\n%!") let loadttf = ref (fun _ -> Printf.printf "BAD *******\n%!") let setembedstd14 = ref (fun _ _ -> Printf.printf "BAD ********\n%!") +let getindent = ref (fun () -> Printf.printf "BAD *********\n%!"; None) let ttfs = null_hash () @@ -370,7 +371,7 @@ let addpara s = add_default_fontpack (!getfontname ()); addop (Cpdfdraw.Font (!getfontname (), !getfontsize ())); let j, w, s = jws s in - addop (Cpdfdraw.Para (j, w, [s])) + addop (Cpdfdraw.Para (None, j, w, [s])) let rec split_on_newlines a = function | 0x005c::0x006e::t -> rev a::split_on_newlines [] t @@ -386,4 +387,5 @@ let addparas s = addop (Cpdfdraw.Font (!getfontname (), !getfontsize ())); let j, w, s = jws s in let splits = split_on_newlines s in - addop (Cpdfdraw.Para (j, w, splits)) + let indent = !getindent () in + addop (Cpdfdraw.Para (indent, j, w, splits)) diff --git a/cpdfdrawcontrol.mli b/cpdfdrawcontrol.mli index f546f55..92bcfe9 100644 --- a/cpdfdrawcontrol.mli +++ b/cpdfdrawcontrol.mli @@ -3,6 +3,7 @@ val embed_font : (unit -> Cpdfembed.cpdffont) ref val getfontname : (unit -> string) ref val getfontsize : (unit -> float) ref +val getindent : (unit -> float option) ref val setfontname : (string -> unit) ref val setfontsize : (float -> unit) ref val setdrawing : (unit -> unit) ref