More typesetter work

This commit is contained in:
John Whitington 2021-11-19 15:21:37 -08:00
parent 925ae38795
commit 12c4c7ab7e
3 changed files with 94 additions and 51 deletions

View File

@ -2909,6 +2909,25 @@ let collate (names, pdfs, ranges) =
done; done;
split3 (rev !nis) split3 (rev !nis)
let of_utf8 (f, fontsize) t =
let pdf = Pdf.empty () in
let fontdict = Pdftext.write_font pdf f in
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
let charcodes = Pdftext.codepoints_of_utf8 t in
implode (map char_of_int (option_map extractor charcodes))
let typeset text =
let pdf = Pdf.empty () in
let f = (Pdftext.StandardFont (Pdftext.Courier, Pdftext.WinAnsiEncoding), 12.) in
let pages =
Cpdftype.typeset
20. 20. 20. 20. Pdfpaper.a4 pdf
[Cpdftype.Font f;
Text (of_utf8 f (string_of_bytes text))]
in
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
Pdfpage.add_root pageroot [] pdf
(* Main function *) (* Main function *)
let go () = let go () =
match args.op with match args.op with
@ -3832,7 +3851,9 @@ let go () =
| Some TableOfContents -> | Some TableOfContents ->
Printf.printf "Making a table of contents...\n" Printf.printf "Making a table of contents...\n"
| Some (Typeset filename) -> | Some (Typeset filename) ->
Printf.printf "Typesetting a text file...\n" let text = Pdfio.bytes_of_input_channel (open_in filename) in
let pdf = typeset text in
write_pdf false pdf
(* Advise the user if a combination of command line flags makes little sense, (* Advise the user if a combination of command line flags makes little sense,
or error out if it make no sense at all. *) or error out if it make no sense at all. *)

View File

@ -1,76 +1,110 @@
(* A typesetter for cpdf. A list of elements is manipulated zero or more times (* A typesetter for cpdf. A list of elements is manipulated zero or more times
to lay it out, paginate it, and so on. It is then typeset to produce a list to lay it out, paginate it, and so on. It is then typeset to produce a list
of pages *) of pages *)
open Pdfutil
(* Text is represented as a list of unicode code points *) (* FIXME We need to make Pdfstandard14 width calculations much more efficient
type text = int list by caching so that we are not making a table up for each character! *)
(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
open Pdfutil
(* Glue *) (* Glue *)
type glue = type glue =
{glen : float; {glen : float;
stretch : float} gstretch : float}
(* Main type *) (* Main type *)
type element = type element =
Text of text Text of string (* WinAnsiEncoding *)
| HGlue of glue | HGlue of glue
| VGlue of glue | VGlue of glue
| NewLine | NewLine
| NewPage | NewPage
| Font of Pdftext.font * float | Font of (Pdftext.font * float)
let string_of_element = function let string_of_element = function
| Text t -> Pdftext.utf8_of_codepoints t | Text t -> t
| HGlue _ -> "HGLUE" | HGlue _ -> "HGLUE"
| VGlue _ -> "VGLUE" | VGlue _ -> "VGLUE"
| NewLine -> "NewLine" | NewLine -> "NewLine"
| NewPage -> "NewPage" | NewPage -> "NewPage"
| Font _ -> "Font" | Font _ -> "Font"
let indent x = HGlue {glen = x; stretch = 0.} let indent x = HGlue {glen = x; gstretch = 0.}
let newpara x = VGlue {glen = x; stretch = 0.} let newpara x = VGlue {glen = x; gstretch = 0.}
type t = element list type t = element list
let of_utf8 = Pdftext.codepoints_of_utf8 let of_utf8 (f, fontsize) t =
let pdf = Pdf.empty () in
let fontdict = Pdftext.write_font pdf f in
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
let charcodes = Pdftext.codepoints_of_utf8 t in
charcodes |> option_map extractor |> map char_of_int |> implode
let times_roman_12 = (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding), 12.)
let times_italic_10 = (Pdftext.StandardFont (Pdftext.TimesItalic, Pdftext.WinAnsiEncoding), 10.)
let times_bold_10 = (Pdftext.StandardFont (Pdftext.TimesBold, Pdftext.WinAnsiEncoding), 10.)
let example = let example =
[Font (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding), 12.); [Font times_roman_12;
Text (of_utf8 "Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font."); newpara 12.; (* set up top of page correctly *)
Text (of_utf8 times_roman_12 "Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font.");
NewLine; NewLine;
newpara 10.; Text (of_utf8 times_roman_12 "After the newline... ");
indent 72.; newpara (12. *. 1.3);
Font (Pdftext.StandardFont (Pdftext.TimesItalic, Pdftext.WinAnsiEncoding), 10.); indent 32.;
Text (of_utf8 "The second paragraph"); Font times_italic_10;
Text (of_utf8 times_italic_10 "The second paragraph");
NewPage; NewPage;
Font (Pdftext.StandardFont (Pdftext.TimesBold, Pdftext.WinAnsiEncoding), 10.); newpara 10.; (* set up top of page *)
Text (of_utf8 "A little too bold"); Font times_bold_10;
Text (of_utf8 times_bold_10 "A little too bold");
] ]
type state = type state =
{mutable font : Pdftext.font option; {mutable font : Pdftext.font option;
mutable fontsize : float;
mutable width_table : float array; (* Widths for charcodes 0..255 *)
mutable xpos : float; mutable xpos : float;
mutable ypos : float} mutable ypos : float}
let initial_state () = let initial_state () =
{font = None; {font = None;
fontsize = 0.;
width_table = [||];
xpos = 0.; xpos = 0.;
ypos = 0.} ypos = 0.}
(* Split text into lines, resolve all hglue stretches to 0, remove Newlines. *) let font_widths f fontsize =
let layout_element s xpos_max fo = function let w = fontsize *. (600. /. 1000.) in
| e -> fo e Array.make 256 w
(* For now, split each text element into words, and lay them out ragged right.
Words longer than a whole line just fall off the margin. Turn text newlines
into real newlines. *)
let layout lmargin rmargin papersize i = let layout lmargin rmargin papersize i =
let width = let width =
Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize) Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize)
in in
let o = ref [] in let o = ref [] in
let s = initial_state () in let s = initial_state () in
let xpos_max = Pdfpaper.width papersize -. lmargin in let xpos_max = width -. lmargin in
s.xpos <- lmargin; s.xpos <- lmargin;
iter (layout_element s xpos_max (fun e -> o := e::!o)) i; let rec layout_element = function
| Font (f, fontsize) ->
s.width_table <- font_widths f fontsize;
o := Font (f, fontsize) :: !o
| Text text ->
o := Text text :: !o
(* 1. If it all fits, just pass on, adding to xpos *)
(* 2. If not, layout one line, splitting on words, and add a newline and recurse. *)
| HGlue {glen} as glue ->
s.xpos <- s.xpos +. glen;
o := glue :: !o;
if s.xpos >= xpos_max then layout_element NewLine
| x -> o := x :: !o
in
iter layout_element i;
rev !o rev !o
(* Resolve all hglue stretches, insert NewPage as needed. *) (* Resolve all hglue stretches, insert NewPage as needed. *)
@ -106,24 +140,12 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
in in
pages := page :: !pages pages := page :: !pages
in in
let typeset_element = function let rec typeset_element = function
| Text cps -> | Text cps ->
let charcodestring =
match s.font with
| None -> failwith "font not set up"
| Some f ->
match List.assoc_opt f !fonts with
| Some objnum ->
let extractor =
Pdftext.charcode_extractor_of_font pdf (Pdf.lookup_obj pdf objnum)
in
implode (map char_of_int (option_map extractor cps))
| None -> failwith "font not found"
in
ops := ops :=
Pdfops.Op_Q Pdfops.Op_Q
::Pdfops.Op_ET ::Pdfops.Op_ET
::Pdfops.Op_Tj charcodestring ::Pdfops.Op_Tj cps
::Pdfops.Op_BT ::Pdfops.Op_BT
::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos)) ::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos))
::Pdfops.Op_q ::Pdfops.Op_q
@ -139,6 +161,7 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
(n, num) (n, num)
in in
s.font <- Some f; s.font <- Some f;
s.fontsize <- fontsize;
thispagefontnums := objnum :: !thispagefontnums; thispagefontnums := objnum :: !thispagefontnums;
ops := Pdfops.Op_Tf (name, fontsize)::!ops ops := Pdfops.Op_Tf (name, fontsize)::!ops
| HGlue {glen} -> | HGlue {glen} ->
@ -146,7 +169,8 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
| VGlue {glen} -> | VGlue {glen} ->
s.ypos <- s.ypos +. glen s.ypos <- s.ypos +. glen
| NewLine -> | NewLine ->
s.xpos <- 0. s.xpos <- lmargin;
typeset_element (VGlue {glen = s.fontsize *. 1.3; gstretch = 0.})
| NewPage -> | NewPage ->
write_page (); write_page ();
thispagefontnums := []; thispagefontnums := [];

View File

@ -1,16 +1,14 @@
type text = int list
type glue = type glue =
{glen : float; {glen : float;
stretch : float} gstretch : float}
type element = type element =
Text of text Text of string
| HGlue of glue | HGlue of glue
| VGlue of glue | VGlue of glue
| NewLine | NewLine
| NewPage | NewPage
| Font of Pdftext.font * float | Font of (Pdftext.font * float)
type t = element list type t = element list