(* Superimpose text, page numbers etc. *)
open Pdfutil
open Cpdferror
| RGB of float * float * float
| CYMK of float * float * float * float
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *)
(* Process UTF8 text to charcodes, given a font. *)
let charcodes_of_utf8 font s =
let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in
let codepoints = Pdftext.codepoints_of_utf8 s in
implode (map char_of_int charcodes)
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
let utf8_of_winansi s =
let text_extractor =
(Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
let codepoints = Pdftext.codepoints_of_text text_extractor s in
Pdftext.utf8_of_codepoints codepoints
(* Get the width of some text in the given font *)
let width_of_text font text =
match font with
_ -> None
let make_font embed fontname =
let rec string_of_encoding = function
| Pdftext.StandardEncoding -> "StandardEncoding"
| Pdftext.MacRomanEncoding -> "MacRomanEncoding"
| Pdftext.WinAnsiEncoding -> "WinAnsiEncoding"
| _ -> error "unknown encoding"
let make_font embed encoding fontname =
let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in
let header, width_data, _, chars_and_widths = Pdfstandard14.afm_data font in
let widths = extract_widths (list_of_hashtbl chars_and_widths) in
("/CapHeight", capheight);
("/StemV", Pdf.Integer stemv)]
(* With -no-embed-font, we use the standard encoding, and just the
* minimal stuff. Without -no-embed-font, we switch to WinAnsiEncoding,
* and fill out everything except the font file instead *)
if embed then
[("/Type", Pdf.Name "/Font");
@ -290,7 +282,8 @@ let pagelabel pdf num =
(Pdfpagelabels.complete (Pdfpagelabels.read pdf))
let addtext
lines linewidth outline fast colour fontname embed bates batespad fontsize (font : Pdftext.font option)
lines linewidth outline fast colour fontname encoding embed bates batespad fontsize
(font : Pdftext.font option)
fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity
justification filename extract_text_font_size shift pdf
match font with
| Some (Pdftext.StandardFont (f, _)) ->
let rawwidth =
(if embed then Pdftext.WinAnsiEncoding else Pdftext.StandardEncoding)
Pdfstandard14.textwidth false encoding f text
(float rawwidth *. fontsize) /. 1000.
| Some font ->
@ -403,7 +392,7 @@ let addtext
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox orientation position in
let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox position in
let hoffset, voffset =
if position = Diagonal || position = ReverseDiagonal
then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset
@ -423,7 +412,7 @@ let addtext
match font with
| Some (Pdftext.StandardFont _) ->
let newfontdict =
Pdf.add_dict_entry fontdict unique_fontname (make_font embed fontname)
Pdf.add_dict_entry fontdict unique_fontname (make_font embed encoding fontname)
Pdf.add_dict_entry resources' "/Font" newfontdict
| Some f ->
let realfontname = ref fontname in
let fontpdfobj =
match font with
| Some (StandardFont (f, _)) ->
make_font embed (Pdftext.string_of_standard_font f)
| Some (StandardFont (f, encoding)) ->
make_font embed encoding (Pdftext.string_of_standard_font f)
| Some f ->
Pdf.Indirect (Pdftext.write_font pdf f)
| None ->
voffset := !voffset +. capheight
| _ ->
Printf.eprintf "Unable to find midline adjustment in this font\n"
(fun line ->
let voff, hoff =
if orientation = Cpdfposition.Vertical then 0., -.(!voffset) else !voffset, 0.
pdf :=
addtext lines linewidth outline fast colour !realfontname
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line
pages orientation cropbox opacity justification filename
extract_text_font_size shift
voffset := !voffset +. (linespacing *. fontsize))
let encoding =
match font with
| Some (Pdftext.StandardFont (_, e)) -> e
| Some (Pdftext.SimpleFont {encoding}) -> encoding
| _ -> Pdftext.WinAnsiEncoding
(fun line ->
let voff, hoff = !voffset, 0. in
pdf :=
addtext lines linewidth outline fast colour !realfontname encoding
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line
pages orientation cropbox opacity justification filename
extract_text_font_size shift
voffset := !voffset +. (linespacing *. fontsize))
let removetext range pdf =
(* Could fail on nesting, or other marked content inside our marked content.*)
@ -652,7 +644,7 @@ let addrectangle
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
let x, y, _ =
Cpdfposition.calculate_position false w mediabox Cpdfposition.Horizontal position
Cpdfposition.calculate_position false w mediabox position
let x, y =
match position with
bool -> (*underneath*)
string ->(*text*)
int list ->(*page range*)
Cpdfposition.orientation ->(*orientation*)
'a ->(*orientation*)
bool ->(*relative to cropbox?*)
float ->(*opacity*)
justification ->(*justification*)
mutable bates : int;
mutable batespad : int option;
mutable prerotate : bool;
mutable orientation : Cpdfposition.orientation;
mutable relative_to_cropbox : bool;
mutable keepversion : bool;
mutable bycolumns : bool;
@ -519,7 +518,6 @@ let args =
bates = 0;
batespad = None;
prerotate = false;
orientation = Cpdfposition.Horizontal;
relative_to_cropbox = false;
keepversion = false;
bycolumns = false;
args.bates <- 0;
args.batespad <- None;
args.prerotate <- false;
args.orientation <- Cpdfposition.Horizontal;
args.relative_to_cropbox <- false;
args.keepversion <- false;
args.bycolumns <- false;
@ -1472,12 +1469,6 @@ let setimpath p =
let setp2ppath p =
args.path_to_p2p <- p
let settextvertical () =
args.orientation <- Cpdfposition.Vertical
let settextverticaldown () =
args.orientation <- Cpdfposition.VerticalDown
let setfrombox s =
detect_duplicate_op CopyBox;
args.op <- Some CopyBox;
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
("-stay-on-error", Arg.Unit setstayonerror, "");
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
("-text-vertical", Arg.Unit settextvertical, "");
("-text-vertical-down", Arg.Unit settextverticaldown, "");
("-flat-kids", Arg.Unit setflatkids, "");
("-debug", Arg.Unit setdebug, "");
("-debug-crypt", Arg.Unit setdebugcrypt, "");
args.linewidth args.outline args.fast args.fontname
font args.embedfonts args.bates args.batespad args.color args.position
args.linespacing args.fontsize args.underneath text range
args.orientation args.relative_to_cropbox args.opacity
() args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText ->
| Some r -> r
| None -> page.Pdfpage.mediabox)
let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in
let sx, sy, _ = Cpdfposition.calculate_position true 0. box position in
let tx, ty =
let open Cpdfposition in
match position with
| ReverseDiagonal -> "Reverse Diagonal"
| Centre -> "Centre"
type orientation =
| Horizontal
| Vertical
| VerticalDown
(* Given the mediabox, calculate an absolute position for the text. *)
let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos =
let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in
let calculate_position ignore_d w (xmin, ymin, xmax, ymax) pos =
let rot = 0. in
match pos with
| Centre ->
(xmin +. xmax) /. 2. -. w /. 2.,
(** Produce a debug string of a [position] *)
val string_of_position : position -> string
(** Orientation of the string on the page *)
type orientation = Horizontal | Vertical | VerticalDown
(** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates
the absolute position of text given its width, bounding box, orientation and
position. If [ignore_d] is true, the distance from the position (e.g 10 in
bool ->
float ->
float * float * float * float ->
orientation -> position -> float * float * float
position -> float * float * float
