2021-11-18 20:09:09 +01:00
|
|
|
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
|
|
|
|
to lay it out, paginate it, and so on. It is then typeset to produce a list
|
2021-12-14 13:57:27 +01:00
|
|
|
of pages.
|
|
|
|
|
|
|
|
For now, this is just an experiment for -table-of-contents and -typeset. To
|
|
|
|
be continued... *)
|
2021-11-20 00:21:37 +01:00
|
|
|
open Pdfutil
|
2021-11-18 20:09:09 +01:00
|
|
|
|
|
|
|
(* Glue *)
|
|
|
|
type glue =
|
|
|
|
{glen : float;
|
2021-11-20 00:21:37 +01:00
|
|
|
gstretch : float}
|
2021-11-18 20:09:09 +01:00
|
|
|
|
|
|
|
(* Main type *)
|
2021-11-18 23:48:25 +01:00
|
|
|
type element =
|
2021-12-02 01:11:33 +01:00
|
|
|
Text of char list (* charcodes 0..255 *)
|
2021-11-18 20:09:09 +01:00
|
|
|
| HGlue of glue
|
|
|
|
| VGlue of glue
|
|
|
|
| NewLine
|
|
|
|
| NewPage
|
2021-11-20 00:21:37 +01:00
|
|
|
| Font of (Pdftext.font * float)
|
2021-11-20 01:28:13 +01:00
|
|
|
| BeginDest of Pdfdest.t
|
|
|
|
| EndDest
|
2021-12-14 15:07:52 +01:00
|
|
|
| BeginDocument
|
2021-11-18 23:48:25 +01:00
|
|
|
|
2021-11-21 23:15:06 +01:00
|
|
|
let to_string_elt = function
|
2021-12-02 01:11:33 +01:00
|
|
|
| Text t -> implode t
|
2021-11-21 23:15:06 +01:00
|
|
|
| HGlue {glen} -> "HGLUE" ^ string_of_float glen
|
2021-11-18 23:48:25 +01:00
|
|
|
| VGlue _ -> "VGLUE"
|
|
|
|
| NewLine -> "NewLine"
|
|
|
|
| NewPage -> "NewPage"
|
|
|
|
| Font _ -> "Font"
|
2021-11-20 01:28:13 +01:00
|
|
|
| BeginDest _ -> "BeginDest"
|
|
|
|
| EndDest -> "EndDest"
|
2021-12-14 15:07:52 +01:00
|
|
|
| BeginDocument -> "BeginDocument"
|
2021-11-18 20:09:09 +01:00
|
|
|
|
2021-11-21 23:15:06 +01:00
|
|
|
let to_string es = fold_left (fun a b -> a ^ "\n" ^ b) "" (map to_string_elt es)
|
|
|
|
|
2021-11-18 23:48:25 +01:00
|
|
|
type t = element list
|
2021-11-18 20:09:09 +01:00
|
|
|
|
|
|
|
type state =
|
2021-11-18 23:48:25 +01:00
|
|
|
{mutable font : Pdftext.font option;
|
2021-11-20 00:21:37 +01:00
|
|
|
mutable fontsize : float;
|
|
|
|
mutable width_table : float array; (* Widths for charcodes 0..255 *)
|
2021-11-18 20:09:09 +01:00
|
|
|
mutable xpos : float;
|
2021-11-20 01:28:13 +01:00
|
|
|
mutable ypos : float;
|
|
|
|
mutable dest : Pdfdest.t option}
|
2021-11-18 20:09:09 +01:00
|
|
|
|
|
|
|
let initial_state () =
|
2021-11-18 23:48:25 +01:00
|
|
|
{font = None;
|
2021-11-20 00:21:37 +01:00
|
|
|
fontsize = 0.;
|
|
|
|
width_table = [||];
|
2021-11-18 20:09:09 +01:00
|
|
|
xpos = 0.;
|
2021-11-20 01:28:13 +01:00
|
|
|
ypos = 0.;
|
|
|
|
dest = None}
|
2021-11-18 20:09:09 +01:00
|
|
|
|
2021-11-20 00:21:37 +01:00
|
|
|
let font_widths f fontsize =
|
2022-09-14 19:08:14 +02:00
|
|
|
match f with
|
|
|
|
| Pdftext.StandardFont (sf, _) ->
|
|
|
|
Array.init
|
|
|
|
256
|
|
|
|
(fun x ->
|
|
|
|
fontsize
|
|
|
|
*. float_of_int
|
|
|
|
(Pdfstandard14.textwidth false Pdftext.WinAnsiEncoding sf (string_of_char (char_of_int x)))
|
|
|
|
/. 1000.)
|
|
|
|
| Pdftext.SimpleFont {fontmetrics = Some m} -> m
|
|
|
|
| _ -> raise (Pdf.PDFError "Cpdftype: Unsupported font")
|
2021-11-18 23:48:25 +01:00
|
|
|
|
2021-12-02 01:20:39 +01:00
|
|
|
let width_of_string ws s =
|
|
|
|
let w = ref 0. in
|
|
|
|
iter (fun s -> w := !w +. ws.(int_of_char s)) s;
|
|
|
|
!w
|
|
|
|
|
2021-11-20 01:28:13 +01:00
|
|
|
(* For now, split each text element into words, and lay them out ragged right
|
|
|
|
on one long page. Words longer than a whole line just fall off the margin.
|
|
|
|
Turn text newlines into real newlines. *)
|
2021-11-22 23:44:14 +01:00
|
|
|
(* Split into words on spaces. Find how many words (at least one, to make
|
|
|
|
progress) fit into the available space. We set needs_newline if the next
|
|
|
|
word would overflow. Return (text, needs_newline, remaining_text) *)
|
|
|
|
let split_text space_left widths t =
|
2021-12-02 01:11:33 +01:00
|
|
|
let chars = ref t in
|
2021-11-22 23:44:14 +01:00
|
|
|
let words = ref [] in
|
|
|
|
let space_left = ref space_left in
|
|
|
|
let return needs_newline =
|
2021-12-02 01:11:33 +01:00
|
|
|
(flatten (rev !words), needs_newline, !chars)
|
2021-11-22 23:44:14 +01:00
|
|
|
in
|
|
|
|
try
|
|
|
|
while !chars <> [] do
|
|
|
|
let word, rest = cleavewhile (neq ' ') !chars in
|
|
|
|
let w = width_of_string widths word in
|
|
|
|
if !words = [] || w < !space_left
|
2021-11-23 23:31:16 +01:00
|
|
|
then
|
|
|
|
let is_last_word = rest = [] in
|
|
|
|
let new_word = if is_last_word then word else word @ [' '] in
|
|
|
|
begin
|
|
|
|
words := new_word::!words;
|
|
|
|
space_left := !space_left -. w -. (if is_last_word then 0. else width_of_string widths [' '])
|
|
|
|
end
|
2021-11-22 23:44:14 +01:00
|
|
|
else raise Exit;
|
|
|
|
chars := if rest = [] then [] else tl rest;
|
|
|
|
done;
|
|
|
|
return false
|
|
|
|
with
|
|
|
|
Exit -> return true
|
2021-11-20 01:28:13 +01:00
|
|
|
|
2021-11-18 20:09:09 +01:00
|
|
|
let layout lmargin rmargin papersize i =
|
2021-11-19 00:23:38 +01:00
|
|
|
let width =
|
|
|
|
Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize)
|
|
|
|
in
|
|
|
|
let o = ref [] in
|
2021-11-18 23:48:25 +01:00
|
|
|
let s = initial_state () in
|
2021-11-20 00:21:37 +01:00
|
|
|
let xpos_max = width -. lmargin in
|
2021-11-19 00:23:38 +01:00
|
|
|
s.xpos <- lmargin;
|
2021-11-20 00:21:37 +01:00
|
|
|
let rec layout_element = function
|
|
|
|
| Font (f, fontsize) ->
|
|
|
|
s.width_table <- font_widths f fontsize;
|
|
|
|
o := Font (f, fontsize) :: !o
|
|
|
|
| Text text ->
|
2021-12-02 01:11:33 +01:00
|
|
|
if text = [] then () else
|
2021-11-22 23:44:14 +01:00
|
|
|
begin
|
|
|
|
let this_line, needs_newline, remaining_text =
|
|
|
|
split_text (xpos_max -. s.xpos) s.width_table text
|
|
|
|
in
|
|
|
|
o := Text this_line :: !o;
|
2021-12-02 01:11:33 +01:00
|
|
|
s.xpos <- s.xpos +. width_of_string s.width_table this_line;
|
2021-11-22 23:44:14 +01:00
|
|
|
if needs_newline then layout_element NewLine;
|
2021-12-02 01:11:33 +01:00
|
|
|
if remaining_text <> [] then layout_element (Text remaining_text)
|
2021-11-22 23:44:14 +01:00
|
|
|
end
|
2021-11-20 00:21:37 +01:00
|
|
|
| HGlue {glen} as glue ->
|
|
|
|
s.xpos <- s.xpos +. glen;
|
|
|
|
o := glue :: !o;
|
|
|
|
if s.xpos >= xpos_max then layout_element NewLine
|
2021-11-22 02:27:51 +01:00
|
|
|
| NewLine ->
|
|
|
|
s.xpos <- lmargin;
|
|
|
|
o := NewLine :: !o
|
|
|
|
| x ->
|
|
|
|
o := x :: !o
|
2021-11-20 00:21:37 +01:00
|
|
|
in
|
|
|
|
iter layout_element i;
|
2021-11-19 00:23:38 +01:00
|
|
|
rev !o
|
2021-11-18 20:09:09 +01:00
|
|
|
|
2021-11-23 00:43:45 +01:00
|
|
|
(* Paginate, simply line-based. When ypos + lineheight exceeds max_ypos, we insert a page break. *)
|
|
|
|
let paginate tmargin bmargin papersize i =
|
|
|
|
let height = Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.height papersize) in
|
|
|
|
let o = ref [] in
|
|
|
|
let s = initial_state () in
|
|
|
|
s.ypos <- tmargin;
|
2021-12-06 01:25:50 +01:00
|
|
|
let max_ypos = height -. bmargin in
|
2021-11-23 00:43:45 +01:00
|
|
|
let rec process = function
|
|
|
|
| VGlue {glen} as glue ->
|
|
|
|
s.ypos <- s.ypos +. glen;
|
|
|
|
o := glue :: !o;
|
|
|
|
if s.ypos > max_ypos then process NewPage
|
|
|
|
| NewLine ->
|
|
|
|
s.ypos <- s.ypos +. s.fontsize *. 1.3;
|
|
|
|
o := NewLine::!o;
|
|
|
|
if s.ypos > max_ypos then process NewPage
|
|
|
|
| Font (f, fs) ->
|
|
|
|
s.font <- Some f;
|
|
|
|
s.fontsize <- fs;
|
|
|
|
o := Font (f, fs)::!o
|
|
|
|
| NewPage ->
|
2021-12-14 15:19:58 +01:00
|
|
|
s.ypos <- tmargin +. s.fontsize;
|
2021-11-23 00:43:45 +01:00
|
|
|
o := NewPage::!o
|
2021-12-14 15:19:58 +01:00
|
|
|
| BeginDocument ->
|
|
|
|
s.ypos <- tmargin +. s.fontsize;
|
|
|
|
o := BeginDocument::!o
|
2021-11-23 00:43:45 +01:00
|
|
|
| x -> o := x::!o
|
|
|
|
in
|
|
|
|
iter process i;
|
|
|
|
rev !o
|
2021-11-18 20:09:09 +01:00
|
|
|
|
2021-11-18 23:48:25 +01:00
|
|
|
let make_resources fontobjnums =
|
|
|
|
Pdf.Dictionary
|
|
|
|
[("/Font", Pdf.Dictionary (map (fun fo -> ("/F" ^ string_of_int fo, Pdf.Indirect fo)) fontobjnums))]
|
|
|
|
|
2021-11-23 23:20:38 +01:00
|
|
|
let make_annotations annots =
|
|
|
|
if annots = [] then Pdf.Dictionary [] else
|
|
|
|
Pdf.Dictionary ["/Annots", Pdf.Array annots]
|
|
|
|
|
2021-11-18 23:48:25 +01:00
|
|
|
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
|
|
|
|
NewPage elements. Split on NewPages, typeset each page, add font
|
|
|
|
dictionaries. New page only
|
|
|
|
creates a page when that page has content. *)
|
|
|
|
let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
2021-11-23 23:31:16 +01:00
|
|
|
let debug = false in
|
|
|
|
if debug then (print_endline "***input:\n\n"; print_endline (to_string i));
|
2021-11-18 23:48:25 +01:00
|
|
|
let i = layout lmargin rmargin papersize i in
|
2021-11-23 23:31:16 +01:00
|
|
|
if debug then (print_endline "***after layout:\n\n"; print_endline (to_string i));
|
2021-11-18 23:48:25 +01:00
|
|
|
let i = paginate tmargin bmargin papersize i in
|
2021-11-23 23:31:16 +01:00
|
|
|
if debug then (print_endline "***after pagination:\n\n"; print_endline (to_string i));
|
2021-11-19 00:23:38 +01:00
|
|
|
let height = Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.height papersize) in
|
2021-11-18 23:48:25 +01:00
|
|
|
let s = initial_state () in
|
|
|
|
s.xpos <- lmargin;
|
2021-11-19 00:23:38 +01:00
|
|
|
s.ypos <- tmargin;
|
2021-11-18 23:48:25 +01:00
|
|
|
let ops = ref [] in
|
|
|
|
let fonts = ref [] in
|
|
|
|
let thispagefontnums = ref [] in
|
2021-11-23 23:20:38 +01:00
|
|
|
let thispageannotations = ref [] in
|
|
|
|
let thisdestrectangles = ref [] in
|
2021-11-18 23:48:25 +01:00
|
|
|
let pages = ref [] in
|
|
|
|
let write_page () =
|
|
|
|
if !ops <> [] then
|
|
|
|
let page =
|
|
|
|
{Pdfpage.content = [Pdfops.stream_of_ops (rev !ops)];
|
|
|
|
Pdfpage.mediabox = Pdfpage.rectangle_of_paper papersize;
|
|
|
|
Pdfpage.resources = make_resources !thispagefontnums;
|
|
|
|
Pdfpage.rotate = Pdfpage.Rotate0;
|
2021-11-23 23:20:38 +01:00
|
|
|
Pdfpage.rest = make_annotations !thispageannotations}
|
2021-11-18 23:48:25 +01:00
|
|
|
in
|
|
|
|
pages := page :: !pages
|
|
|
|
in
|
2021-11-20 00:21:37 +01:00
|
|
|
let rec typeset_element = function
|
2021-11-18 23:48:25 +01:00
|
|
|
| Text cps ->
|
2021-11-20 00:21:37 +01:00
|
|
|
ops :=
|
|
|
|
Pdfops.Op_Q
|
|
|
|
::Pdfops.Op_ET
|
2021-12-02 01:11:33 +01:00
|
|
|
::Pdfops.Op_Tj (implode cps)
|
2021-11-20 00:21:37 +01:00
|
|
|
::Pdfops.Op_BT
|
|
|
|
::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos))
|
|
|
|
::Pdfops.Op_q
|
2021-11-23 23:20:38 +01:00
|
|
|
::!ops;
|
2021-11-20 01:28:13 +01:00
|
|
|
(* If a destination, add the rectangle to the pile of rectangles for this annotation. *)
|
2021-11-23 23:20:38 +01:00
|
|
|
if s.dest <> None then
|
2021-12-07 00:33:18 +01:00
|
|
|
begin
|
2021-11-23 23:20:38 +01:00
|
|
|
let minx, miny = s.xpos, height -. s.ypos in
|
2021-12-07 00:33:18 +01:00
|
|
|
thisdestrectangles := (minx, miny, minx +. width_of_string s.width_table cps, miny +. s.fontsize)::!thisdestrectangles
|
|
|
|
end;
|
|
|
|
s.xpos <- s.xpos +. width_of_string s.width_table cps
|
2021-11-18 23:48:25 +01:00
|
|
|
| Font (f, fontsize) ->
|
|
|
|
let name, objnum =
|
|
|
|
match List.assoc_opt f !fonts with
|
|
|
|
| Some objnum -> ("/F" ^ string_of_int objnum, objnum)
|
|
|
|
| None ->
|
|
|
|
let num = Pdftext.write_font pdf f in
|
|
|
|
let n = "/F" ^ string_of_int num in
|
|
|
|
fonts := (f, num) :: !fonts;
|
|
|
|
(n, num)
|
|
|
|
in
|
2021-11-23 23:20:38 +01:00
|
|
|
s.width_table <- font_widths f fontsize;
|
2021-11-18 23:48:25 +01:00
|
|
|
s.font <- Some f;
|
2021-11-20 00:21:37 +01:00
|
|
|
s.fontsize <- fontsize;
|
2021-11-18 23:48:25 +01:00
|
|
|
thispagefontnums := objnum :: !thispagefontnums;
|
|
|
|
ops := Pdfops.Op_Tf (name, fontsize)::!ops
|
|
|
|
| HGlue {glen} ->
|
|
|
|
s.xpos <- s.xpos +. glen
|
|
|
|
| VGlue {glen} ->
|
2021-11-19 00:23:38 +01:00
|
|
|
s.ypos <- s.ypos +. glen
|
2021-11-18 23:48:25 +01:00
|
|
|
| NewLine ->
|
2021-11-20 00:21:37 +01:00
|
|
|
s.xpos <- lmargin;
|
|
|
|
typeset_element (VGlue {glen = s.fontsize *. 1.3; gstretch = 0.})
|
2021-11-18 23:48:25 +01:00
|
|
|
| NewPage ->
|
|
|
|
write_page ();
|
2021-11-19 01:41:42 +01:00
|
|
|
thispagefontnums := [];
|
2021-11-23 23:20:38 +01:00
|
|
|
thispageannotations := [];
|
2021-11-18 23:48:25 +01:00
|
|
|
ops := [];
|
2021-11-23 00:43:45 +01:00
|
|
|
if s.font <> None then typeset_element (Font (unopt s.font, s.fontsize));
|
2021-11-19 01:41:42 +01:00
|
|
|
s.xpos <- lmargin;
|
2021-12-14 15:07:52 +01:00
|
|
|
s.ypos <- tmargin +. s.fontsize
|
|
|
|
| BeginDocument ->
|
|
|
|
s.ypos <- tmargin +. s.fontsize
|
2021-11-20 01:28:13 +01:00
|
|
|
| BeginDest dest ->
|
|
|
|
s.dest <- Some dest
|
|
|
|
| EndDest ->
|
2021-11-23 23:20:38 +01:00
|
|
|
if !thisdestrectangles <> [] && s.dest <> None then
|
|
|
|
let annot (minx, miny, maxx, maxy) =
|
|
|
|
Pdf.Dictionary
|
|
|
|
[("/Type", Pdf.Name "/Annot");
|
|
|
|
("/Subtype", Pdf.Name "/Link");
|
2021-11-23 23:31:16 +01:00
|
|
|
("/Border", Pdf.Array [Pdf.Real 0.; Pdf.Real 0.; Pdf.Real 0.]);
|
2021-11-23 23:20:38 +01:00
|
|
|
("/Rect", Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy]);
|
|
|
|
("/Dest", Pdfdest.pdfobject_of_destination (unopt s.dest))]
|
|
|
|
in
|
|
|
|
thispageannotations := map annot !thisdestrectangles @ !thispageannotations;
|
|
|
|
s.dest <- None;
|
|
|
|
thisdestrectangles := []
|
2021-11-18 23:48:25 +01:00
|
|
|
in
|
|
|
|
iter typeset_element i;
|
|
|
|
write_page ();
|
|
|
|
rev !pages
|