2021-11-18 20:09:09 +01:00
(* A typesetter for cpdf. A list of elements is manipulated zero or more times
to lay it out , paginate it , and so on . It is then typeset to produce a list
of pages * )
2021-11-20 00:21:37 +01:00
(* FIXME We need to make Pdfstandard14 width calculations much more efficient
by caching so that we are not making a table up for each character ! * )
(* FIXME We need to reintroduce kerning in Pdfstandard14. *)
2021-11-20 01:28:13 +01:00
(* FIXME Fix up charcode / text extractors to take fonts not fontdicts *)
2021-11-20 00:21:37 +01:00
open Pdfutil
2021-11-18 20:09:09 +01:00
(* Glue *)
type glue =
{ glen : float ;
2021-11-20 00:21:37 +01:00
gstretch : float }
2021-11-18 20:09:09 +01:00
(* Main type *)
2021-11-18 23:48:25 +01:00
type element =
2021-11-20 00:21:37 +01:00
Text of string (* WinAnsiEncoding *)
2021-11-18 20:09:09 +01:00
| HGlue of glue
| VGlue of glue
| NewLine
| NewPage
2021-11-20 00:21:37 +01:00
| Font of ( Pdftext . font * float )
2021-11-20 01:28:13 +01:00
| BeginDest of Pdfdest . t
| EndDest
2021-11-18 23:48:25 +01:00
2021-11-21 23:15:06 +01:00
let to_string_elt = function
2021-11-20 00:21:37 +01:00
| Text t -> t
2021-11-21 23:15:06 +01:00
| HGlue { glen } -> " HGLUE " ^ string_of_float glen
2021-11-18 23:48:25 +01:00
| VGlue _ -> " VGLUE "
| NewLine -> " NewLine "
| NewPage -> " NewPage "
| Font _ -> " Font "
2021-11-20 01:28:13 +01:00
| BeginDest _ -> " BeginDest "
| EndDest -> " EndDest "
2021-11-18 20:09:09 +01:00
2021-11-21 23:15:06 +01:00
let to_string es = fold_left ( fun a b -> a ^ " \n " ^ b ) " " ( map to_string_elt es )
2021-11-20 00:21:37 +01:00
let indent x = HGlue { glen = x ; gstretch = 0 . }
let newpara x = VGlue { glen = x ; gstretch = 0 . }
2021-11-18 20:09:09 +01:00
2021-11-18 23:48:25 +01:00
type t = element list
2021-11-18 20:09:09 +01:00
2021-11-20 00:21:37 +01:00
let of_utf8 ( f , fontsize ) t =
let pdf = Pdf . empty () in
let fontdict = Pdftext . write_font pdf f in
let extractor = Pdftext . charcode_extractor_of_font pdf ( Pdf . Indirect fontdict ) in
2021-11-20 01:28:13 +01:00
Pdftext . codepoints_of_utf8 t
| > option_map extractor
| > map char_of_int
| > implode
2021-11-20 00:21:37 +01:00
let times_roman_12 = ( Pdftext . StandardFont ( Pdftext . TimesRoman , Pdftext . WinAnsiEncoding ) , 12 . )
let times_italic_10 = ( Pdftext . StandardFont ( Pdftext . TimesItalic , Pdftext . WinAnsiEncoding ) , 10 . )
let times_bold_10 = ( Pdftext . StandardFont ( Pdftext . TimesBold , Pdftext . WinAnsiEncoding ) , 10 . )
2021-11-18 20:09:09 +01:00
let example =
2021-11-20 00:21:37 +01:00
[ Font times_roman_12 ;
newpara 12 . ; (* set up top of page correctly *)
Text ( of_utf8 times_roman_12 " Jackdaws love my Sphinx of Quartz. And this, this is the second sentence to provoke a line-break. We need rather more text than one might think in this diminutive font. " ) ;
2021-11-18 23:48:25 +01:00
NewLine ;
2021-11-20 00:21:37 +01:00
Text ( of_utf8 times_roman_12 " After the newline... " ) ;
newpara ( 12 . * . 1 . 3 ) ;
indent 32 . ;
Font times_italic_10 ;
Text ( of_utf8 times_italic_10 " The second paragraph " ) ;
2021-11-19 01:41:42 +01:00
NewPage ;
2021-11-20 00:21:37 +01:00
newpara 10 . ; (* set up top of page *)
Font times_bold_10 ;
2021-11-20 01:28:13 +01:00
BeginDest Pdfdest . NullDestination ;
2021-11-20 00:21:37 +01:00
Text ( of_utf8 times_bold_10 " A little too bold " ) ;
2021-11-20 01:28:13 +01:00
EndDest
2021-11-19 01:41:42 +01:00
]
2021-11-18 20:09:09 +01:00
type state =
2021-11-18 23:48:25 +01:00
{ mutable font : Pdftext . font option ;
2021-11-20 00:21:37 +01:00
mutable fontsize : float ;
mutable width_table : float array ; (* Widths for charcodes 0..255 *)
2021-11-18 20:09:09 +01:00
mutable xpos : float ;
2021-11-20 01:28:13 +01:00
mutable ypos : float ;
mutable dest : Pdfdest . t option }
2021-11-18 20:09:09 +01:00
let initial_state () =
2021-11-18 23:48:25 +01:00
{ font = None ;
2021-11-20 00:21:37 +01:00
fontsize = 0 . ;
width_table = [| |] ;
2021-11-18 20:09:09 +01:00
xpos = 0 . ;
2021-11-20 01:28:13 +01:00
ypos = 0 . ;
dest = None }
2021-11-18 20:09:09 +01:00
2021-11-20 00:21:37 +01:00
let font_widths f fontsize =
let w = fontsize * . ( 600 . /. 1000 . ) in
Array . make 256 w
2021-11-18 23:48:25 +01:00
2021-11-20 01:28:13 +01:00
(* For now, split each text element into words, and lay them out ragged right
on one long page . Words longer than a whole line just fall off the margin .
Turn text newlines into real newlines . * )
let width_of_string ws s =
2021-11-22 23:44:14 +01:00
fold_left ( + . ) 0 . ( map ( fun s -> ws . ( int_of_char s ) ) s ) (* FIXME efficiency *)
(* Split into words on spaces. Find how many words ( at least one, to make
progress ) fit into the available space . We set needs_newline if the next
word would overflow . Return ( text , needs_newline , remaining_text ) * )
(* FIXME efficiency *)
let split_text space_left widths t =
let chars = ref ( explode t ) in
let words = ref [] in
let space_left = ref space_left in
let return needs_newline =
( implode ( flatten ( rev ! words ) ) , needs_newline , implode ! chars )
in
try
while ! chars < > [] do
let word , rest = cleavewhile ( neq ' ' ) ! chars in
let w = width_of_string widths word in
if ! words = [] | | w < ! space_left
then ( words := ( word @ [ ' ' ] ) :: ! words ; space_left := ! space_left -. w -. width_of_string widths [ ' ' ] )
else raise Exit ;
chars := if rest = [] then [] else tl rest ;
done ;
return false
with
Exit -> return true
2021-11-20 01:28:13 +01:00
2021-11-18 20:09:09 +01:00
let layout lmargin rmargin papersize i =
2021-11-19 00:23:38 +01:00
let width =
Pdfunits . convert 72 . ( Pdfpaper . unit papersize ) Pdfunits . PdfPoint ( Pdfpaper . width papersize )
in
let o = ref [] in
2021-11-18 23:48:25 +01:00
let s = initial_state () in
2021-11-20 00:21:37 +01:00
let xpos_max = width -. lmargin in
2021-11-19 00:23:38 +01:00
s . xpos <- lmargin ;
2021-11-20 00:21:37 +01:00
let rec layout_element = function
| Font ( f , fontsize ) ->
s . width_table <- font_widths f fontsize ;
o := Font ( f , fontsize ) :: ! o
| Text text ->
2021-11-22 23:44:14 +01:00
if text = " " then () else
begin
let this_line , needs_newline , remaining_text =
split_text ( xpos_max -. s . xpos ) s . width_table text
in
o := Text this_line :: ! o ;
s . xpos <- s . xpos + . width_of_string s . width_table ( explode this_line ) ;
if needs_newline then layout_element NewLine ;
if remaining_text < > " " then layout_element ( Text remaining_text )
end
2021-11-20 00:21:37 +01:00
| HGlue { glen } as glue ->
s . xpos <- s . xpos + . glen ;
o := glue :: ! o ;
if s . xpos > = xpos_max then layout_element NewLine
2021-11-22 02:27:51 +01:00
| NewLine ->
s . xpos <- lmargin ;
o := NewLine :: ! o
| x ->
o := x :: ! o
2021-11-20 00:21:37 +01:00
in
iter layout_element i ;
2021-11-19 00:23:38 +01:00
rev ! o
2021-11-18 20:09:09 +01:00
(* Resolve all hglue stretches, insert NewPage as needed. *)
let paginate tmargin bmargin papersize i = i
2021-11-18 23:48:25 +01:00
let make_resources fontobjnums =
Pdf . Dictionary
[ ( " /Font " , Pdf . Dictionary ( map ( fun fo -> ( " /F " ^ string_of_int fo , Pdf . Indirect fo ) ) fontobjnums ) ) ]
(* At this stage, just Font and Text and HGlue 0. and VGlue 0. and Newline and
NewPage elements . Split on NewPages , typeset each page , add font
dictionaries . New page only
creates a page when that page has content . * )
let typeset lmargin rmargin tmargin bmargin papersize pdf i =
2021-11-22 23:44:14 +01:00
print_endline " ***input: \n \n " ;
print_endline ( to_string i ) ;
2021-11-18 23:48:25 +01:00
let i = layout lmargin rmargin papersize i in
2021-11-22 02:27:51 +01:00
print_endline " ***after layout: \n \n " ;
print_endline ( to_string i ) ;
2021-11-18 23:48:25 +01:00
let i = paginate tmargin bmargin papersize i in
2021-11-22 02:27:51 +01:00
print_endline " ***after pagination: \n \n " ;
print_endline ( to_string i ) ;
2021-11-19 00:23:38 +01:00
let height = Pdfunits . convert 72 . ( Pdfpaper . unit papersize ) Pdfunits . PdfPoint ( Pdfpaper . height papersize ) in
2021-11-18 23:48:25 +01:00
let s = initial_state () in
s . xpos <- lmargin ;
2021-11-19 00:23:38 +01:00
s . ypos <- tmargin ;
2021-11-18 23:48:25 +01:00
let ops = ref [] in
let fonts = ref [] in
let thispagefontnums = ref [] in
let pages = ref [] in
let write_page () =
if ! ops < > [] then
let page =
{ Pdfpage . content = [ Pdfops . stream_of_ops ( rev ! ops ) ] ;
Pdfpage . mediabox = Pdfpage . rectangle_of_paper papersize ;
Pdfpage . resources = make_resources ! thispagefontnums ;
Pdfpage . rotate = Pdfpage . Rotate0 ;
Pdfpage . rest = Pdf . Dictionary [] }
in
pages := page :: ! pages
in
2021-11-20 00:21:37 +01:00
let rec typeset_element = function
2021-11-18 23:48:25 +01:00
| Text cps ->
2021-11-20 00:21:37 +01:00
ops :=
Pdfops . Op_Q
:: Pdfops . Op_ET
:: Pdfops . Op_Tj cps
:: Pdfops . Op_BT
:: Pdfops . Op_cm ( Pdftransform . mktranslate s . xpos ( height -. s . ypos ) )
:: Pdfops . Op_q
:: ! ops
2021-11-20 01:28:13 +01:00
(* If a destination, add the rectangle to the pile of rectangles for this annotation. *)
2021-11-18 23:48:25 +01:00
| Font ( f , fontsize ) ->
let name , objnum =
match List . assoc_opt f ! fonts with
| Some objnum -> ( " /F " ^ string_of_int objnum , objnum )
| None ->
let num = Pdftext . write_font pdf f in
let n = " /F " ^ string_of_int num in
fonts := ( f , num ) :: ! fonts ;
( n , num )
in
s . font <- Some f ;
2021-11-20 00:21:37 +01:00
s . fontsize <- fontsize ;
2021-11-18 23:48:25 +01:00
thispagefontnums := objnum :: ! thispagefontnums ;
ops := Pdfops . Op_Tf ( name , fontsize ) :: ! ops
| HGlue { glen } ->
s . xpos <- s . xpos + . glen
| VGlue { glen } ->
2021-11-19 00:23:38 +01:00
s . ypos <- s . ypos + . glen
2021-11-18 23:48:25 +01:00
| NewLine ->
2021-11-20 00:21:37 +01:00
s . xpos <- lmargin ;
typeset_element ( VGlue { glen = s . fontsize * . 1 . 3 ; gstretch = 0 . } )
2021-11-18 23:48:25 +01:00
| NewPage ->
write_page () ;
2021-11-19 01:41:42 +01:00
thispagefontnums := [] ;
2021-11-18 23:48:25 +01:00
ops := [] ;
2021-11-19 01:41:42 +01:00
s . xpos <- lmargin ;
s . ypos <- tmargin
2021-11-20 01:28:13 +01:00
| BeginDest dest ->
s . dest <- Some dest
| EndDest ->
(* Collect together the rectangles for this annotation, merge? and output *)
s . dest <- None
2021-11-18 23:48:25 +01:00
in
iter typeset_element i ;
write_page () ;
rev ! pages
let example_pdf () =
let pdf = Pdf . empty () in
2021-11-19 00:23:38 +01:00
let pages = typeset 20 . 20 . 20 . 20 . Pdfpaper . a4 pdf example in
2021-11-18 23:48:25 +01:00
let pdf , pageroot = Pdfpage . add_pagetree pages pdf in
Pdfpage . add_root pageroot [] pdf
2021-11-18 20:09:09 +01:00
2021-11-21 23:15:06 +01:00
(* let _ =
Pdfwrite . pdf_to_file ( example_pdf () ) " out.pdf " * )