This commit is contained in:
John Whitington 2022-09-21 17:40:28 +01:00
parent 00ba9d4bee
commit 256a74bd0f
6 changed files with 42 additions and 69 deletions

View File

@ -1,3 +1,4 @@
(* Superimpose text, page numbers etc. *)
open Pdfutil
open Cpdferror
@ -6,10 +7,7 @@ type color =
| RGB of float * float * float
| CYMK of float * float * float * float
(* Superimpose text, page numbers etc. *)
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *)
(* Process UTF8 text to charcodes, given a font. *)
let charcodes_of_utf8 font s =
let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in
let codepoints = Pdftext.codepoints_of_utf8 s in
@ -23,15 +21,6 @@ let charcodes_of_utf8 font s =
in
implode (map char_of_int charcodes)
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
let utf8_of_winansi s =
let text_extractor =
Pdftext.text_extractor_of_font_real
(Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
in
let codepoints = Pdftext.codepoints_of_text text_extractor s in
Pdftext.utf8_of_codepoints codepoints
(* Get the width of some text in the given font *)
let width_of_text font text =
match font with
@ -151,7 +140,13 @@ let cap_height fontname =
with
_ -> None
let make_font embed fontname =
let rec string_of_encoding = function
| Pdftext.StandardEncoding -> "StandardEncoding"
| Pdftext.MacRomanEncoding -> "MacRomanEncoding"
| Pdftext.WinAnsiEncoding -> "WinAnsiEncoding"
| _ -> error "unknown encoding"
let make_font embed encoding fontname =
let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in
let header, width_data, _, chars_and_widths = Pdfstandard14.afm_data font in
let widths = extract_widths (list_of_hashtbl chars_and_widths) in
@ -174,9 +169,6 @@ let make_font embed fontname =
("/CapHeight", capheight);
("/StemV", Pdf.Integer stemv)]
in
(* With -no-embed-font, we use the standard encoding, and just the
* minimal stuff. Without -no-embed-font, we switch to WinAnsiEncoding,
* and fill out everything except the font file instead *)
if embed then
Pdf.Dictionary
[("/Type", Pdf.Name "/Font");
@ -290,7 +282,8 @@ let pagelabel pdf num =
(Pdfpagelabels.complete (Pdfpagelabels.read pdf))
let addtext
lines linewidth outline fast colour fontname embed bates batespad fontsize (font : Pdftext.font option)
lines linewidth outline fast colour fontname encoding embed bates batespad fontsize
(font : Pdftext.font option)
fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity
justification filename extract_text_font_size shift pdf
=
@ -345,11 +338,7 @@ let addtext
match font with
| Some (Pdftext.StandardFont (f, _)) ->
let rawwidth =
Pdfstandard14.textwidth
false
(if embed then Pdftext.WinAnsiEncoding else Pdftext.StandardEncoding)
f
text
Pdfstandard14.textwidth false encoding f text
in
(float rawwidth *. fontsize) /. 1000.
| Some font ->
@ -403,7 +392,7 @@ let addtext
else
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in
let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox orientation position in
let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox position in
let hoffset, voffset =
if position = Diagonal || position = ReverseDiagonal
then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset
@ -423,7 +412,7 @@ let addtext
match font with
| Some (Pdftext.StandardFont _) ->
let newfontdict =
Pdf.add_dict_entry fontdict unique_fontname (make_font embed fontname)
Pdf.add_dict_entry fontdict unique_fontname (make_font embed encoding fontname)
in
Pdf.add_dict_entry resources' "/Font" newfontdict
| Some f ->
@ -510,8 +499,8 @@ let
let realfontname = ref fontname in
let fontpdfobj =
match font with
| Some (StandardFont (f, _)) ->
make_font embed (Pdftext.string_of_standard_font f)
| Some (StandardFont (f, encoding)) ->
make_font embed encoding (Pdftext.string_of_standard_font f)
| Some f ->
Pdf.Indirect (Pdftext.write_font pdf f)
| None ->
@ -577,22 +566,25 @@ let
voffset := !voffset +. capheight
| _ ->
Printf.eprintf "Unable to find midline adjustment in this font\n"
end
else
iter
(fun line ->
let voff, hoff =
if orientation = Cpdfposition.Vertical then 0., -.(!voffset) else !voffset, 0.
in
pdf :=
addtext lines linewidth outline fast colour !realfontname
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line
pages orientation cropbox opacity justification filename
extract_text_font_size shift
!pdf;
voffset := !voffset +. (linespacing *. fontsize))
lines;
!pdf
end;
let encoding =
match font with
| Some (Pdftext.StandardFont (_, e)) -> e
| Some (Pdftext.SimpleFont {encoding}) -> encoding
| _ -> Pdftext.WinAnsiEncoding
in
iter
(fun line ->
let voff, hoff = !voffset, 0. in
pdf :=
addtext lines linewidth outline fast colour !realfontname encoding
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line
pages orientation cropbox opacity justification filename
extract_text_font_size shift
!pdf;
voffset := !voffset +. (linespacing *. fontsize))
lines;
!pdf
let removetext range pdf =
(* Could fail on nesting, or other marked content inside our marked content.*)
@ -652,7 +644,7 @@ let addrectangle
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in
let x, y, _ =
Cpdfposition.calculate_position false w mediabox Cpdfposition.Horizontal position
Cpdfposition.calculate_position false w mediabox position
in
let x, y =
match position with

View File

@ -34,7 +34,7 @@ val addtexts :
bool -> (*underneath*)
string ->(*text*)
int list ->(*page range*)
Cpdfposition.orientation ->(*orientation*)
'a ->(*orientation*)
bool ->(*relative to cropbox?*)
float ->(*opacity*)
justification ->(*justification*)

View File

@ -396,7 +396,6 @@ type args =
mutable bates : int;
mutable batespad : int option;
mutable prerotate : bool;
mutable orientation : Cpdfposition.orientation;
mutable relative_to_cropbox : bool;
mutable keepversion : bool;
mutable bycolumns : bool;
@ -519,7 +518,6 @@ let args =
bates = 0;
batespad = None;
prerotate = false;
orientation = Cpdfposition.Horizontal;
relative_to_cropbox = false;
keepversion = false;
bycolumns = false;
@ -642,7 +640,6 @@ let reset_arguments () =
args.bates <- 0;
args.batespad <- None;
args.prerotate <- false;
args.orientation <- Cpdfposition.Horizontal;
args.relative_to_cropbox <- false;
args.keepversion <- false;
args.bycolumns <- false;
@ -1472,12 +1469,6 @@ let setimpath p =
let setp2ppath p =
args.path_to_p2p <- p
let settextvertical () =
args.orientation <- Cpdfposition.Vertical
let settextverticaldown () =
args.orientation <- Cpdfposition.VerticalDown
let setfrombox s =
detect_duplicate_op CopyBox;
args.op <- Some CopyBox;
@ -2527,8 +2518,6 @@ and specs =
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
("-stay-on-error", Arg.Unit setstayonerror, "");
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
("-text-vertical", Arg.Unit settextvertical, "");
("-text-vertical-down", Arg.Unit settextverticaldown, "");
("-flat-kids", Arg.Unit setflatkids, "");
("-debug", Arg.Unit setdebug, "");
("-debug-crypt", Arg.Unit setdebugcrypt, "");
@ -3736,7 +3725,7 @@ let go () =
args.linewidth args.outline args.fast args.fontname
font args.embedfonts args.bates args.batespad args.color args.position
args.linespacing args.fontsize args.underneath text range
args.orientation args.relative_to_cropbox args.opacity
() args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText ->

View File

@ -142,7 +142,7 @@ let scale_page_contents ?(fast=false) scale position pdf pnum page =
| Some r -> r
| None -> page.Pdfpage.mediabox)
in
let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in
let sx, sy, _ = Cpdfposition.calculate_position true 0. box position in
let tx, ty =
let open Cpdfposition in
match position with

View File

@ -32,14 +32,9 @@ let string_of_position = function
| ReverseDiagonal -> "Reverse Diagonal"
| Centre -> "Centre"
type orientation =
| Horizontal
| Vertical
| VerticalDown
(* Given the mediabox, calculate an absolute position for the text. *)
let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos =
let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in
let calculate_position ignore_d w (xmin, ymin, xmax, ymax) pos =
let rot = 0. in
match pos with
| Centre ->
(xmin +. xmax) /. 2. -. w /. 2.,

View File

@ -20,9 +20,6 @@ type position =
(** Produce a debug string of a [position] *)
val string_of_position : position -> string
(** Orientation of the string on the page *)
type orientation = Horizontal | Vertical | VerticalDown
(** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates
the absolute position of text given its width, bounding box, orientation and
position. If [ignore_d] is true, the distance from the position (e.g 10 in
@ -31,4 +28,4 @@ val calculate_position :
bool ->
float ->
float * float * float * float ->
orientation -> position -> float * float * float
position -> float * float * float