More typesetter work
This commit is contained in:
parent
925ae38795
commit
12c4c7ab7e
|
@ -2909,6 +2909,25 @@ let collate (names, pdfs, ranges) =
|
||||||
done;
|
done;
|
||||||
split3 (rev !nis)
|
split3 (rev !nis)
|
||||||
|
|
||||||
|
let of_utf8 (f, fontsize) t =
|
||||||
|
let pdf = Pdf.empty () in
|
||||||
|
let fontdict = Pdftext.write_font pdf f in
|
||||||
|
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
|
||||||
|
let charcodes = Pdftext.codepoints_of_utf8 t in
|
||||||
|
implode (map char_of_int (option_map extractor charcodes))
|
||||||
|
|
||||||
|
let typeset text =
|
||||||
|
let pdf = Pdf.empty () in
|
||||||
|
let f = (Pdftext.StandardFont (Pdftext.Courier, Pdftext.WinAnsiEncoding), 12.) in
|
||||||
|
let pages =
|
||||||
|
Cpdftype.typeset
|
||||||
|
20. 20. 20. 20. Pdfpaper.a4 pdf
|
||||||
|
[Cpdftype.Font f;
|
||||||
|
Text (of_utf8 f (string_of_bytes text))]
|
||||||
|
in
|
||||||
|
let pdf, pageroot = Pdfpage.add_pagetree pages pdf in
|
||||||
|
Pdfpage.add_root pageroot [] pdf
|
||||||
|
|
||||||
(* Main function *)
|
(* Main function *)
|
||||||
let go () =
|
let go () =
|
||||||
match args.op with
|
match args.op with
|
||||||
|
@ -3832,7 +3851,9 @@ let go () =
|
||||||
| Some TableOfContents ->
|
| Some TableOfContents ->
|
||||||
Printf.printf "Making a table of contents...\n"
|
Printf.printf "Making a table of contents...\n"
|
||||||
| Some (Typeset filename) ->
|
| Some (Typeset filename) ->
|
||||||
Printf.printf "Typesetting a text file...\n"
|
let text = Pdfio.bytes_of_input_channel (open_in filename) in
|
||||||
|
let pdf = typeset text in
|
||||||
|
write_pdf false pdf
|
||||||
|
|
||||||
(* Advise the user if a combination of command line flags makes little sense,
|
(* Advise the user if a combination of command line flags makes little sense,
|
||||||
or error out if it make no sense at all. *)
|
or error out if it make no sense at all. *)
|
||||||
|
|
114
cpdftype.ml
114
cpdftype.ml
|
@ -1,76 +1,110 @@
|
||||||
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
|
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
|
||||||
to lay it out, paginate it, and so on. It is then typeset to produce a list
|
to lay it out, paginate it, and so on. It is then typeset to produce a list
|
||||||
of pages *)
|
of pages *)
|
||||||
open Pdfutil
|
|
||||||
|
|
||||||
(* Text is represented as a list of unicode code points *)
|
(* FIXME We need to make Pdfstandard14 width calculations much more efficient
|
||||||
type text = int list
|
by caching so that we are not making a table up for each character! *)
|
||||||
|
(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
|
||||||
|
open Pdfutil
|
||||||
|
|
||||||
(* Glue *)
|
(* Glue *)
|
||||||
type glue =
|
type glue =
|
||||||
{glen : float;
|
{glen : float;
|
||||||
stretch : float}
|
gstretch : float}
|
||||||
|
|
||||||
(* Main type *)
|
(* Main type *)
|
||||||
type element =
|
type element =
|
||||||
Text of text
|
Text of string (* WinAnsiEncoding *)
|
||||||
| HGlue of glue
|
| HGlue of glue
|
||||||
| VGlue of glue
|
| VGlue of glue
|
||||||
| NewLine
|
| NewLine
|
||||||
| NewPage
|
| NewPage
|
||||||
| Font of Pdftext.font * float
|
| Font of (Pdftext.font * float)
|
||||||
|
|
||||||
let string_of_element = function
|
let string_of_element = function
|
||||||
| Text t -> Pdftext.utf8_of_codepoints t
|
| Text t -> t
|
||||||
| HGlue _ -> "HGLUE"
|
| HGlue _ -> "HGLUE"
|
||||||
| VGlue _ -> "VGLUE"
|
| VGlue _ -> "VGLUE"
|
||||||
| NewLine -> "NewLine"
|
| NewLine -> "NewLine"
|
||||||
| NewPage -> "NewPage"
|
| NewPage -> "NewPage"
|
||||||
| Font _ -> "Font"
|
| Font _ -> "Font"
|
||||||
|
|
||||||
let indent x = HGlue {glen = x; stretch = 0.}
|
let indent x = HGlue {glen = x; gstretch = 0.}
|
||||||
let newpara x = VGlue {glen = x; stretch = 0.}
|
let newpara x = VGlue {glen = x; gstretch = 0.}
|
||||||
|
|
||||||
type t = element list
|
type t = element list
|
||||||
|
|
||||||
let of_utf8 = Pdftext.codepoints_of_utf8
|
let of_utf8 (f, fontsize) t =
|
||||||
|
let pdf = Pdf.empty () in
|
||||||
|
let fontdict = Pdftext.write_font pdf f in
|
||||||
|
let extractor = Pdftext.charcode_extractor_of_font pdf (Pdf.Indirect fontdict) in
|
||||||
|
let charcodes = Pdftext.codepoints_of_utf8 t in
|
||||||
|
charcodes |> option_map extractor |> map char_of_int |> implode
|
||||||
|
|
||||||
|
let times_roman_12 = (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding), 12.)
|
||||||
|
let times_italic_10 = (Pdftext.StandardFont (Pdftext.TimesItalic, Pdftext.WinAnsiEncoding), 10.)
|
||||||
|
let times_bold_10 = (Pdftext.StandardFont (Pdftext.TimesBold, Pdftext.WinAnsiEncoding), 10.)
|
||||||
|
|
||||||
let example =
|
let example =
|
||||||
[Font (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding), 12.);
|
[Font times_roman_12;
|
||||||
Text (of_utf8 "Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font.");
|
newpara 12.; (* set up top of page correctly *)
|
||||||
|
Text (of_utf8 times_roman_12 "Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font.");
|
||||||
NewLine;
|
NewLine;
|
||||||
newpara 10.;
|
Text (of_utf8 times_roman_12 "After the newline... ");
|
||||||
indent 72.;
|
newpara (12. *. 1.3);
|
||||||
Font (Pdftext.StandardFont (Pdftext.TimesItalic, Pdftext.WinAnsiEncoding), 10.);
|
indent 32.;
|
||||||
Text (of_utf8 "The second paragraph");
|
Font times_italic_10;
|
||||||
|
Text (of_utf8 times_italic_10 "The second paragraph");
|
||||||
NewPage;
|
NewPage;
|
||||||
Font (Pdftext.StandardFont (Pdftext.TimesBold, Pdftext.WinAnsiEncoding), 10.);
|
newpara 10.; (* set up top of page *)
|
||||||
Text (of_utf8 "A little too bold");
|
Font times_bold_10;
|
||||||
|
Text (of_utf8 times_bold_10 "A little too bold");
|
||||||
]
|
]
|
||||||
|
|
||||||
type state =
|
type state =
|
||||||
{mutable font : Pdftext.font option;
|
{mutable font : Pdftext.font option;
|
||||||
|
mutable fontsize : float;
|
||||||
|
mutable width_table : float array; (* Widths for charcodes 0..255 *)
|
||||||
mutable xpos : float;
|
mutable xpos : float;
|
||||||
mutable ypos : float}
|
mutable ypos : float}
|
||||||
|
|
||||||
let initial_state () =
|
let initial_state () =
|
||||||
{font = None;
|
{font = None;
|
||||||
|
fontsize = 0.;
|
||||||
|
width_table = [||];
|
||||||
xpos = 0.;
|
xpos = 0.;
|
||||||
ypos = 0.}
|
ypos = 0.}
|
||||||
|
|
||||||
(* Split text into lines, resolve all hglue stretches to 0, remove Newlines. *)
|
let font_widths f fontsize =
|
||||||
let layout_element s xpos_max fo = function
|
let w = fontsize *. (600. /. 1000.) in
|
||||||
| e -> fo e
|
Array.make 256 w
|
||||||
|
|
||||||
|
(* For now, split each text element into words, and lay them out ragged right.
|
||||||
|
Words longer than a whole line just fall off the margin. Turn text newlines
|
||||||
|
into real newlines. *)
|
||||||
let layout lmargin rmargin papersize i =
|
let layout lmargin rmargin papersize i =
|
||||||
let width =
|
let width =
|
||||||
Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize)
|
Pdfunits.convert 72. (Pdfpaper.unit papersize) Pdfunits.PdfPoint (Pdfpaper.width papersize)
|
||||||
in
|
in
|
||||||
let o = ref [] in
|
let o = ref [] in
|
||||||
let s = initial_state () in
|
let s = initial_state () in
|
||||||
let xpos_max = Pdfpaper.width papersize -. lmargin in
|
let xpos_max = width -. lmargin in
|
||||||
s.xpos <- lmargin;
|
s.xpos <- lmargin;
|
||||||
iter (layout_element s xpos_max (fun e -> o := e::!o)) i;
|
let rec layout_element = function
|
||||||
|
| Font (f, fontsize) ->
|
||||||
|
s.width_table <- font_widths f fontsize;
|
||||||
|
o := Font (f, fontsize) :: !o
|
||||||
|
| Text text ->
|
||||||
|
o := Text text :: !o
|
||||||
|
(* 1. If it all fits, just pass on, adding to xpos *)
|
||||||
|
(* 2. If not, layout one line, splitting on words, and add a newline and recurse. *)
|
||||||
|
| HGlue {glen} as glue ->
|
||||||
|
s.xpos <- s.xpos +. glen;
|
||||||
|
o := glue :: !o;
|
||||||
|
if s.xpos >= xpos_max then layout_element NewLine
|
||||||
|
| x -> o := x :: !o
|
||||||
|
in
|
||||||
|
iter layout_element i;
|
||||||
rev !o
|
rev !o
|
||||||
|
|
||||||
(* Resolve all hglue stretches, insert NewPage as needed. *)
|
(* Resolve all hglue stretches, insert NewPage as needed. *)
|
||||||
|
@ -106,28 +140,16 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||||
in
|
in
|
||||||
pages := page :: !pages
|
pages := page :: !pages
|
||||||
in
|
in
|
||||||
let typeset_element = function
|
let rec typeset_element = function
|
||||||
| Text cps ->
|
| Text cps ->
|
||||||
let charcodestring =
|
ops :=
|
||||||
match s.font with
|
Pdfops.Op_Q
|
||||||
| None -> failwith "font not set up"
|
::Pdfops.Op_ET
|
||||||
| Some f ->
|
::Pdfops.Op_Tj cps
|
||||||
match List.assoc_opt f !fonts with
|
::Pdfops.Op_BT
|
||||||
| Some objnum ->
|
::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos))
|
||||||
let extractor =
|
::Pdfops.Op_q
|
||||||
Pdftext.charcode_extractor_of_font pdf (Pdf.lookup_obj pdf objnum)
|
::!ops
|
||||||
in
|
|
||||||
implode (map char_of_int (option_map extractor cps))
|
|
||||||
| None -> failwith "font not found"
|
|
||||||
in
|
|
||||||
ops :=
|
|
||||||
Pdfops.Op_Q
|
|
||||||
::Pdfops.Op_ET
|
|
||||||
::Pdfops.Op_Tj charcodestring
|
|
||||||
::Pdfops.Op_BT
|
|
||||||
::Pdfops.Op_cm (Pdftransform.mktranslate s.xpos (height -. s.ypos))
|
|
||||||
::Pdfops.Op_q
|
|
||||||
::!ops
|
|
||||||
| Font (f, fontsize) ->
|
| Font (f, fontsize) ->
|
||||||
let name, objnum =
|
let name, objnum =
|
||||||
match List.assoc_opt f !fonts with
|
match List.assoc_opt f !fonts with
|
||||||
|
@ -139,6 +161,7 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||||
(n, num)
|
(n, num)
|
||||||
in
|
in
|
||||||
s.font <- Some f;
|
s.font <- Some f;
|
||||||
|
s.fontsize <- fontsize;
|
||||||
thispagefontnums := objnum :: !thispagefontnums;
|
thispagefontnums := objnum :: !thispagefontnums;
|
||||||
ops := Pdfops.Op_Tf (name, fontsize)::!ops
|
ops := Pdfops.Op_Tf (name, fontsize)::!ops
|
||||||
| HGlue {glen} ->
|
| HGlue {glen} ->
|
||||||
|
@ -146,7 +169,8 @@ let typeset lmargin rmargin tmargin bmargin papersize pdf i =
|
||||||
| VGlue {glen} ->
|
| VGlue {glen} ->
|
||||||
s.ypos <- s.ypos +. glen
|
s.ypos <- s.ypos +. glen
|
||||||
| NewLine ->
|
| NewLine ->
|
||||||
s.xpos <- 0.
|
s.xpos <- lmargin;
|
||||||
|
typeset_element (VGlue {glen = s.fontsize *. 1.3; gstretch = 0.})
|
||||||
| NewPage ->
|
| NewPage ->
|
||||||
write_page ();
|
write_page ();
|
||||||
thispagefontnums := [];
|
thispagefontnums := [];
|
||||||
|
|
|
@ -1,16 +1,14 @@
|
||||||
type text = int list
|
|
||||||
|
|
||||||
type glue =
|
type glue =
|
||||||
{glen : float;
|
{glen : float;
|
||||||
stretch : float}
|
gstretch : float}
|
||||||
|
|
||||||
type element =
|
type element =
|
||||||
Text of text
|
Text of string
|
||||||
| HGlue of glue
|
| HGlue of glue
|
||||||
| VGlue of glue
|
| VGlue of glue
|
||||||
| NewLine
|
| NewLine
|
||||||
| NewPage
|
| NewPage
|
||||||
| Font of Pdftext.font * float
|
| Font of (Pdftext.font * float)
|
||||||
|
|
||||||
type t = element list
|
type t = element list
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue