Indenting

This commit is contained in:
John Whitington 2024-09-20 14:15:10 +01:00
parent bbc1e4f257
commit c281dd492d
6 changed files with 28 additions and 13 deletions

View File

@ -13,6 +13,8 @@ New features:
* Create structure information for files with -draw * Create structure information for files with -draw
* Draw can now make paragraphs with -para, -paras * Draw can now make paragraphs with -para, -paras
* Add structure information to -typeset, add -typeset-subformat * Add structure information to -typeset, add -typeset-subformat
* -typeset can make PDF/UA documents
* -jpeg, -png and friends can make PDF/UA documents
* = Supported by a grant from NLnet * = Supported by a grant from NLnet

View File

@ -548,7 +548,8 @@ type args =
mutable verify_single : string option; mutable verify_single : string option;
mutable draw_struct_tree : bool; mutable draw_struct_tree : bool;
mutable image_title : string option; mutable image_title : string option;
mutable subformat : Cpdfua.subformat option} mutable subformat : Cpdfua.subformat option;
mutable indent : float option}
let args = let args =
{op = None; {op = None;
@ -686,7 +687,8 @@ let args =
verify_single = None; verify_single = None;
draw_struct_tree = false; draw_struct_tree = false;
image_title = None; image_title = None;
subformat = None} subformat = None;
indent = None}
(* Do not reset original_filename or cpdflin or was_encrypted or (* Do not reset original_filename or cpdflin or was_encrypted or
was_decrypted_with_owner or recrypt or producer or creator or path_to_* or was_decrypted_with_owner or recrypt or producer or creator or path_to_* or
@ -812,7 +814,8 @@ let reset_arguments () =
args.verify_single <- None; args.verify_single <- None;
args.draw_struct_tree <- false; args.draw_struct_tree <- false;
args.image_title <- None; args.image_title <- None;
args.subformat <- None args.subformat <- None;
args.indent <- None
(* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume (* Prefer a) the one given with -cpdflin b) a local cpdflin, c) otherwise assume
installed at a system place *) installed at a system place *)
@ -1826,6 +1829,7 @@ let () = Cpdfdrawcontrol.getfontname := fun () -> args.fontname
let () = Cpdfdrawcontrol.getfontsize := fun () -> args.fontsize let () = Cpdfdrawcontrol.getfontsize := fun () -> args.fontsize
let () = Cpdfdrawcontrol.setfontname := setfont let () = Cpdfdrawcontrol.setfontname := setfont
let () = Cpdfdrawcontrol.setfontsize := fun s -> args.fontsize <- s let () = Cpdfdrawcontrol.setfontsize := fun s -> args.fontsize <- s
let () = Cpdfdrawcontrol.getindent := fun () -> args.indent
let setlistimagesjson () = let setlistimagesjson () =
setop ListImages (); setop ListImages ();
@ -2868,6 +2872,7 @@ let specs =
("-stext", Arg.String Cpdfdrawcontrol.addspecialtext, " Draw text with %specials"); ("-stext", Arg.String Cpdfdrawcontrol.addspecialtext, " Draw text with %specials");
("-para", Arg.String Cpdfdrawcontrol.addpara, " Add a paragraph of text"); ("-para", Arg.String Cpdfdrawcontrol.addpara, " Add a paragraph of text");
("-paras", Arg.String Cpdfdrawcontrol.addparas, " Add paragraphs of text, splitting on newlines"); ("-paras", Arg.String Cpdfdrawcontrol.addparas, " Add paragraphs of text, splitting on newlines");
("-indent", Arg.Float (fun f -> args.indent <- Some f), " Set indent for paragraphs");
("-leading", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.Leading f)), " Set leading"); ("-leading", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.Leading f)), " Set leading");
("-charspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.CharSpace f)), " Set character spacing"); ("-charspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.CharSpace f)), " Set character spacing");
("-wordspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.WordSpace f)), " Set word space"); ("-wordspace", Arg.Float (fun f -> Cpdfdrawcontrol.addop (Cpdfdraw.WordSpace f)), " Set word space");

View File

@ -50,7 +50,7 @@ type drawops =
| TextSection of drawops list | TextSection of drawops list
| Text of string | Text of string
| SpecialText of string | SpecialText of string
| Para of justification * float * string list | Para of float option * justification * float * string list
| Newline | Newline
| Leading of float | Leading of float
| CharSpace of float | CharSpace of float
@ -266,20 +266,24 @@ type structdata =
let structdata = ref [] let structdata = ref []
(* TODO: Use Uuseg for proper unicode segmentation. *) (* TODO: Use Uuseg for proper unicode segmentation. *)
let format_paragraph j w s = let format_paragraph indent j w s =
Printf.printf "indent = %f\n" indent;
let ss = String.split_on_char ' ' s in let ss = String.split_on_char ' ' s in
let rs_and_widths = ref (map runs_of_utf8 ss) in let rs_and_widths = ref (map runs_of_utf8 ss) in
let space_runs, space_width = runs_of_utf8 " " in let space_runs, space_width = runs_of_utf8 " " in
let remaining = ref w in let remaining = ref w in
let allops = ref [] in let allops = ref [] in
let ops = ref [] in let ops = ref [] in
let first = ref true in
let firstloop = ref true in
let justify ops = let justify ops =
match j with match j with
| Left -> ops | Left -> (if !first then [Pdfops.Op_Td (~-.indent, 0.)] else []) @ ops @ (if !first then [Pdfops.Op_Td (indent, 0.)] else [])
| Right -> [Pdfops.Op_Td (~-.(!remaining), 0.)] @ ops @ [Pdfops.Op_Td (!remaining, 0.)] | Right -> [Pdfops.Op_Td (~-.(!remaining), 0.)] @ ops @ [Pdfops.Op_Td (!remaining, 0.)]
| Centre -> [Pdfops.Op_Td (~-.(!remaining) /. 2., 0.)] @ ops @ [Pdfops.Op_Td (!remaining /. 2., 0.)] | Centre -> [Pdfops.Op_Td (~-.(!remaining) /. 2., 0.)] @ ops @ [Pdfops.Op_Td (!remaining /. 2., 0.)]
in in
while !rs_and_widths <> [] do while !rs_and_widths <> [] do
if !firstloop then (remaining -.= indent; clear firstloop);
let word, word_width = hd !rs_and_widths in let word, word_width = hd !rs_and_widths in
if !remaining = w then if !remaining = w then
(* If current line empty, output word. *) (* If current line empty, output word. *)
@ -300,9 +304,10 @@ let format_paragraph j w s =
(* If current line not empty, and not enough space, emit newline. *) (* If current line not empty, and not enough space, emit newline. *)
begin begin
allops =| rev (Pdfops.Op_T'::justify !ops); allops =| rev (Pdfops.Op_T'::justify !ops);
clear first;
ops := []; ops := [];
remaining := w remaining := w;
end end;
done; done;
allops =| rev (Pdfops.Op_T'::justify !ops); allops =| rev (Pdfops.Op_T'::justify !ops);
flatten (rev !allops) flatten (rev !allops)
@ -427,13 +432,13 @@ let rec ops_of_drawop struct_tree dryrun pdf endpage filename bates batespad num
let s = process_specials pdf endpage filename bates batespad num page s in let s = process_specials pdf endpage filename bates batespad num page s in
if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s); if dryrun then iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ()) (Pdftext.codepoints_of_utf8 s);
fst (runs_of_utf8 s) fst (runs_of_utf8 s)
| Para (j, w, s) -> | Para (indent, j, w, s) ->
if dryrun then iter (iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ())) (map Pdftext.codepoints_of_utf8 s); if dryrun then iter (iter (fun c -> Hashtbl.replace (res ()).current_fontpack_codepoints c ())) (map Pdftext.codepoints_of_utf8 s);
let first = ref true in let first = ref true in
flatten flatten
(map (map
(function para -> (function para ->
(if not !first then ([Pdfops.Op_T']) else (clear first; [])) @ format_paragraph j w para) (if not !first && indent = None then ([Pdfops.Op_T']) else (clear first; [])) @ format_paragraph (if indent <> None && not !first then unopt indent else 0.) j w para)
s) s)
| Leading f -> [Pdfops.Op_TL f] | Leading f -> [Pdfops.Op_TL f]
| CharSpace f -> [Pdfops.Op_Tc f] | CharSpace f -> [Pdfops.Op_Tc f]

View File

@ -45,7 +45,7 @@ type drawops =
| TextSection of drawops list | TextSection of drawops list
| Text of string | Text of string
| SpecialText of string | SpecialText of string
| Para of justification * float * string list | Para of float option * justification * float * string list
| Newline | Newline
| Leading of float | Leading of float
| CharSpace of float | CharSpace of float

View File

@ -10,6 +10,7 @@ let setfontname = ref (fun _ -> Printf.printf "BAD *****\n%!")
let setfontsize = ref (fun _ -> Printf.printf "BAD ******\n%!") let setfontsize = ref (fun _ -> Printf.printf "BAD ******\n%!")
let loadttf = ref (fun _ -> Printf.printf "BAD *******\n%!") let loadttf = ref (fun _ -> Printf.printf "BAD *******\n%!")
let setembedstd14 = ref (fun _ _ -> Printf.printf "BAD ********\n%!") let setembedstd14 = ref (fun _ _ -> Printf.printf "BAD ********\n%!")
let getindent = ref (fun () -> Printf.printf "BAD *********\n%!"; None)
let ttfs = null_hash () let ttfs = null_hash ()
@ -370,7 +371,7 @@ let addpara s =
add_default_fontpack (!getfontname ()); add_default_fontpack (!getfontname ());
addop (Cpdfdraw.Font (!getfontname (), !getfontsize ())); addop (Cpdfdraw.Font (!getfontname (), !getfontsize ()));
let j, w, s = jws s in let j, w, s = jws s in
addop (Cpdfdraw.Para (j, w, [s])) addop (Cpdfdraw.Para (None, j, w, [s]))
let rec split_on_newlines a = function let rec split_on_newlines a = function
| 0x005c::0x006e::t -> rev a::split_on_newlines [] t | 0x005c::0x006e::t -> rev a::split_on_newlines [] t
@ -386,4 +387,5 @@ let addparas s =
addop (Cpdfdraw.Font (!getfontname (), !getfontsize ())); addop (Cpdfdraw.Font (!getfontname (), !getfontsize ()));
let j, w, s = jws s in let j, w, s = jws s in
let splits = split_on_newlines s in let splits = split_on_newlines s in
addop (Cpdfdraw.Para (j, w, splits)) let indent = !getindent () in
addop (Cpdfdraw.Para (indent, j, w, splits))

View File

@ -3,6 +3,7 @@
val embed_font : (unit -> Cpdfembed.cpdffont) ref val embed_font : (unit -> Cpdfembed.cpdffont) ref
val getfontname : (unit -> string) ref val getfontname : (unit -> string) ref
val getfontsize : (unit -> float) ref val getfontsize : (unit -> float) ref
val getindent : (unit -> float option) ref
val setfontname : (string -> unit) ref val setfontname : (string -> unit) ref
val setfontsize : (float -> unit) ref val setfontsize : (float -> unit) ref
val setdrawing : (unit -> unit) ref val setdrawing : (unit -> unit) ref