cpdf-source/cpdftype.ml

193 lines
6.3 KiB
OCaml
Raw Normal View History

2021-11-18 20:09:09 +01:00
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
to lay it out, paginate it, and so on. It is then typeset to produce a list
of pages *)
2021-11-20 00:21:37 +01:00
(* FIXME We need to make Pdfstandard14 width calculations much more efficient
by caching so that we are not making a table up for each character! *)
(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
open Pdfutil
2021-11-18 20:09:09 +01:00
(* Glue *)
type glue =
{glen : float;
2021-11-20 00:21:37 +01:00
gstretch : float}
2021-11-18 20:09:09 +01:00
(* Main type *)
2021-11-18 23:48:25 +01:00
type element =
2021-11-20 00:21:37 +01:00
Text of string (* WinAnsiEncoding *)
2021-11-18 20:09:09 +01:00
| HGlue of glue
| VGlue of glue
| NewLine
| NewPage
2021-11-20 00:21:37 +01:00
| Font of (Pdftext.font * float)
2021-11-18 23:48:25 +01:00
let string_of_element = function
2021-11-20 00:21:37 +01:00
| Text t -> t
2021-11-18 23:48:25 +01:00
| HGlue _ -> "HGLUE"
| VGlue _ -> "VGLUE"
| NewLine -> "NewLine"
| NewPage -> "NewPage"
| Font _ -> "Font"
2021-11-18 20:09:09 +01:00
2021-11-20 00:21:37 +01:00
let indent x = HGlue {glen = x; gstretch = 0.}
let newpara x = VGlue {glen = x; gstretch = 0.}
2021-11-18 20:09:09 +01:00
2021-11-18 23:48:25 +01:00
type t = element list
2021-11-18 20:09:09 +01:00
2021-11-20 00:21:37 +01:00
let of_utf8 (f, fontsize) t =
let pdf = Pdf.empty () in
let fontdict = Pdftext.write_font pdf f in
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
let charcodes = Pdftext.codepoints_of_utf8 t in
charcodes |> option_map extractor |> map char_of_int |> implode
let times_roman_12 = (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding), 12.)
let times_italic_10 = (Pdftext.StandardFont (Pdftext.TimesItalic, Pdftext.WinAnsiEncoding), 10.)
let times_bold_10 = (Pdftext.StandardFont (Pdftext.TimesBold, Pdftext.WinAnsiEncoding), 10.)
2021-11-18 20:09:09 +01:00
let example =
2021-11-20 00:21:37 +01:00
[Font times_roman_12;
newpara 12.; (* set up top of page correctly *)
Text (of_utf8 times_roman_12 "Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font.");
2021-11-18 23:48:25 +01:00
NewLine;
2021-11-20 00:21:37 +01:00
Text (of_utf8 times_roman_12 "After the newline... ");
newpara (12. *. 1.3);
indent 32.;
Font times_italic_10;
Text (of_utf8 times_italic_10 "The second paragraph");
2021-11-19 01:41:42 +01:00
NewPage;
2021-11-20 00:21:37 +01:00
newpara 10.; (* set up top of page *)
Font times_bold_10;
Text (of_utf8 times_bold_10 "A little too bold");
2021-11-19 01:41:42 +01:00
]
2021-11-18 20:09:09 +01:00
type state =
2021-11-18 23:48:25 +01:00
{mutable font : Pdftext.font option;
2021-11-20 00:21:37 +01:00
mutable fontsize : float;
mutable width_table : float array; (* Widths for charcodes 0..255 *)
2021-11-18 20:09:09 +01:00
mutable xpos : float;
mutable ypos : float}
let initial_state () =
2021-11-18 23:48:25 +01:00
{font = None;
2021-11-20 00:21:37 +01:00
fontsize = 0.;
width_table = [||];
2021-11-18 20:09:09 +01:00
xpos = 0.;
ypos = 0.}
2021-11-20 00:21:37 +01:00
let font_widths f fontsize =
let w = fontsize *. (600. /. 1000.) in
Array.make 256 w
2021-11-18 23:48:25 +01:00
2021-11-20 00:21:37 +01:00
(* For now, split each text element into words, and lay them out ragged right.
Words longer than a whole line just fall off the margin. Turn text newlines
into real newlines. *)
2021-11-18 20:09:09 +01:00
let layout lmargin rmargin papersize i =
2021-11-19 00:23:38 +01:00
let width =
Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize)
in
let o = ref [] in
2021-11-18 23:48:25 +01:00
let s = initial_state () in
2021-11-20 00:21:37 +01:00
let xpos_max = width -. lmargin in
2021-11-19 00:23:38 +01:00
s.xpos <- lmargin;
2021-11-20 00:21:37 +01:00
let rec layout_element = function
| Font (f, fontsize) ->
s.width_table <- font_widths f fontsize;
o := Font (f, fontsize) :: !o
| Text text ->
o := Text text :: !o
(* 1. If it all fits, just pass on, adding to xpos *)
(* 2. If not, layout one line, splitting on words, and add a newline and recurse. *)
| HGlue {glen} as glue ->
s.xpos <- s.xpos +. glen;
o := glue :: !o;
if s.xpos >= xpos_max then layout_element NewLine
| x -> o := x :: !o
in
iter layout_element i;
2021-11-19 00:23:38 +01:00
rev !o
2021-11-18 20:09:09 +01:00
(* Resolve all hglue stretches, insert NewPage as needed. *)
let paginate tmargin bmargin papersize i = i
2021-11-18 23:48:25 +01:00
let make_resources fontobjnums =
Pdf.Dictionary
[("/Font", Pdf.Dictionary (map (fun fo -> ("/F" ^ string_of_int fo, Pdf.Indirect fo)) fontobjnums))]
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
NewPage elements. Split on NewPages, typeset each page, add font
dictionaries. New page only
creates a page when that page has content. *)
let typeset lmargin rmargin tmargin bmargin papersize pdf i =
let i = layout lmargin rmargin papersize i in
let i = paginate tmargin bmargin papersize i in
2021-11-19 00:23:38 +01:00
let height = Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.height papersize) in
2021-11-18 23:48:25 +01:00
let s = initial_state () in
s.xpos <- lmargin;
2021-11-19 00:23:38 +01:00
s.ypos <- tmargin;
2021-11-18 23:48:25 +01:00
let ops = ref [] in
let fonts = ref [] in
let thispagefontnums = ref [] in
let pages = ref [] in
let write_page () =
if !ops <> [] then
let page =
{Pdfpage.content = [Pdfops.stream_of_ops (rev !ops)];
Pdfpage.mediabox = Pdfpage.rectangle_of_paper papersize;
Pdfpage.resources = make_resources !thispagefontnums;
Pdfpage.rotate = Pdfpage.Rotate0;
Pdfpage.rest = Pdf.Dictionary []}
in
pages := page :: !pages
in
2021-11-20 00:21:37 +01:00
let rec typeset_element = function
2021-11-18 23:48:25 +01:00
| Text cps ->
2021-11-20 00:21:37 +01:00
ops :=
Pdfops.Op_Q
::Pdfops.Op_ET
::Pdfops.Op_Tj cps
::Pdfops.Op_BT
::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos))
::Pdfops.Op_q
::!ops
2021-11-18 23:48:25 +01:00
| Font (f, fontsize) ->
let name, objnum =
match List.assoc_opt f !fonts with
| Some objnum -> ("/F" ^ string_of_int objnum, objnum)
| None ->
let num = Pdftext.write_font pdf f in
let n = "/F" ^ string_of_int num in
fonts := (f, num) :: !fonts;
(n, num)
in
s.font <- Some f;
2021-11-20 00:21:37 +01:00
s.fontsize <- fontsize;
2021-11-18 23:48:25 +01:00
thispagefontnums := objnum :: !thispagefontnums;
ops := Pdfops.Op_Tf (name, fontsize)::!ops
| HGlue {glen} ->
s.xpos <- s.xpos +. glen
| VGlue {glen} ->
2021-11-19 00:23:38 +01:00
s.ypos <- s.ypos +. glen
2021-11-18 23:48:25 +01:00
| NewLine ->
2021-11-20 00:21:37 +01:00
s.xpos <- lmargin;
typeset_element (VGlue {glen = s.fontsize *. 1.3; gstretch = 0.})
2021-11-18 23:48:25 +01:00
| NewPage ->
write_page ();
2021-11-19 01:41:42 +01:00
thispagefontnums := [];
2021-11-18 23:48:25 +01:00
ops := [];
2021-11-19 01:41:42 +01:00
s.xpos <- lmargin;
s.ypos <- tmargin
2021-11-18 23:48:25 +01:00
in
iter typeset_element i;
write_page ();
rev !pages
let example_pdf () =
let pdf = Pdf.empty () in
2021-11-19 00:23:38 +01:00
let pages = typeset 20. 20. 20. 20. Pdfpaper.a4 pdf example in
2021-11-18 23:48:25 +01:00
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf
2021-11-18 20:09:09 +01:00
2021-11-18 23:48:25 +01:00
let _ =
Pdfwrite.pdf_to_file (example_pdf ()) "out.pdf"