This commit is contained in:
John Whitington 2022-09-21 17:40:28 +01:00
parent 00ba9d4bee
commit 256a74bd0f
6 changed files with 42 additions and 69 deletions

View File

@ -1,3 +1,4 @@
(* Superimpose text, page numbers etc. *)
open Pdfutil open Pdfutil
open Cpdferror open Cpdferror
@ -6,10 +7,7 @@ type color =
| RGB of float * float * float | RGB of float * float * float
| CYMK of float * float * float * float | CYMK of float * float * float * float
(* Superimpose text, page numbers etc. *) (* Process UTF8 text to charcodes, given a font. *)
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *)
let charcodes_of_utf8 font s = let charcodes_of_utf8 font s =
let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in
let codepoints = Pdftext.codepoints_of_utf8 s in let codepoints = Pdftext.codepoints_of_utf8 s in
@ -23,15 +21,6 @@ let charcodes_of_utf8 font s =
in in
implode (map char_of_int charcodes) implode (map char_of_int charcodes)
(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *)
let utf8_of_winansi s =
let text_extractor =
Pdftext.text_extractor_of_font_real
(Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding))
in
let codepoints = Pdftext.codepoints_of_text text_extractor s in
Pdftext.utf8_of_codepoints codepoints
(* Get the width of some text in the given font *) (* Get the width of some text in the given font *)
let width_of_text font text = let width_of_text font text =
match font with match font with
@ -151,7 +140,13 @@ let cap_height fontname =
with with
_ -> None _ -> None
let make_font embed fontname = let rec string_of_encoding = function
| Pdftext.StandardEncoding -> "StandardEncoding"
| Pdftext.MacRomanEncoding -> "MacRomanEncoding"
| Pdftext.WinAnsiEncoding -> "WinAnsiEncoding"
| _ -> error "unknown encoding"
let make_font embed encoding fontname =
let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in
let header, width_data, _, chars_and_widths = Pdfstandard14.afm_data font in let header, width_data, _, chars_and_widths = Pdfstandard14.afm_data font in
let widths = extract_widths (list_of_hashtbl chars_and_widths) in let widths = extract_widths (list_of_hashtbl chars_and_widths) in
@ -174,9 +169,6 @@ let make_font embed fontname =
("/CapHeight", capheight); ("/CapHeight", capheight);
("/StemV", Pdf.Integer stemv)] ("/StemV", Pdf.Integer stemv)]
in in
(* With -no-embed-font, we use the standard encoding, and just the
* minimal stuff. Without -no-embed-font, we switch to WinAnsiEncoding,
* and fill out everything except the font file instead *)
if embed then if embed then
Pdf.Dictionary Pdf.Dictionary
[("/Type", Pdf.Name "/Font"); [("/Type", Pdf.Name "/Font");
@ -290,7 +282,8 @@ let pagelabel pdf num =
(Pdfpagelabels.complete (Pdfpagelabels.read pdf)) (Pdfpagelabels.complete (Pdfpagelabels.read pdf))
let addtext let addtext
lines linewidth outline fast colour fontname embed bates batespad fontsize (font : Pdftext.font option) lines linewidth outline fast colour fontname encoding embed bates batespad fontsize
(font : Pdftext.font option)
fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity
justification filename extract_text_font_size shift pdf justification filename extract_text_font_size shift pdf
= =
@ -345,11 +338,7 @@ let addtext
match font with match font with
| Some (Pdftext.StandardFont (f, _)) -> | Some (Pdftext.StandardFont (f, _)) ->
let rawwidth = let rawwidth =
Pdfstandard14.textwidth Pdfstandard14.textwidth false encoding f text
false
(if embed then Pdftext.WinAnsiEncoding else Pdftext.StandardEncoding)
f
text
in in
(float rawwidth *. fontsize) /. 1000. (float rawwidth *. fontsize) /. 1000.
| Some font -> | Some font ->
@ -403,7 +392,7 @@ let addtext
else else
Pdf.parse_rectangle pdf page.Pdfpage.mediabox Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in in
let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox orientation position in let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox position in
let hoffset, voffset = let hoffset, voffset =
if position = Diagonal || position = ReverseDiagonal if position = Diagonal || position = ReverseDiagonal
then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset
@ -423,7 +412,7 @@ let addtext
match font with match font with
| Some (Pdftext.StandardFont _) -> | Some (Pdftext.StandardFont _) ->
let newfontdict = let newfontdict =
Pdf.add_dict_entry fontdict unique_fontname (make_font embed fontname) Pdf.add_dict_entry fontdict unique_fontname (make_font embed encoding fontname)
in in
Pdf.add_dict_entry resources' "/Font" newfontdict Pdf.add_dict_entry resources' "/Font" newfontdict
| Some f -> | Some f ->
@ -510,8 +499,8 @@ let
let realfontname = ref fontname in let realfontname = ref fontname in
let fontpdfobj = let fontpdfobj =
match font with match font with
| Some (StandardFont (f, _)) -> | Some (StandardFont (f, encoding)) ->
make_font embed (Pdftext.string_of_standard_font f) make_font embed encoding (Pdftext.string_of_standard_font f)
| Some f -> | Some f ->
Pdf.Indirect (Pdftext.write_font pdf f) Pdf.Indirect (Pdftext.write_font pdf f)
| None -> | None ->
@ -577,22 +566,25 @@ let
voffset := !voffset +. capheight voffset := !voffset +. capheight
| _ -> | _ ->
Printf.eprintf "Unable to find midline adjustment in this font\n" Printf.eprintf "Unable to find midline adjustment in this font\n"
end end;
else let encoding =
iter match font with
(fun line -> | Some (Pdftext.StandardFont (_, e)) -> e
let voff, hoff = | Some (Pdftext.SimpleFont {encoding}) -> encoding
if orientation = Cpdfposition.Vertical then 0., -.(!voffset) else !voffset, 0. | _ -> Pdftext.WinAnsiEncoding
in in
pdf := iter
addtext lines linewidth outline fast colour !realfontname (fun line ->
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line let voff, hoff = !voffset, 0. in
pages orientation cropbox opacity justification filename pdf :=
extract_text_font_size shift addtext lines linewidth outline fast colour !realfontname encoding
!pdf; embed bates batespad fontsize font fontpdfobj underneath position hoff voff line
voffset := !voffset +. (linespacing *. fontsize)) pages orientation cropbox opacity justification filename
lines; extract_text_font_size shift
!pdf !pdf;
voffset := !voffset +. (linespacing *. fontsize))
lines;
!pdf
let removetext range pdf = let removetext range pdf =
(* Could fail on nesting, or other marked content inside our marked content.*) (* Could fail on nesting, or other marked content inside our marked content.*)
@ -652,7 +644,7 @@ let addrectangle
Pdf.parse_rectangle pdf page.Pdfpage.mediabox Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in in
let x, y, _ = let x, y, _ =
Cpdfposition.calculate_position false w mediabox Cpdfposition.Horizontal position Cpdfposition.calculate_position false w mediabox position
in in
let x, y = let x, y =
match position with match position with

View File

@ -34,7 +34,7 @@ val addtexts :
bool -> (*underneath*) bool -> (*underneath*)
string ->(*text*) string ->(*text*)
int list ->(*page range*) int list ->(*page range*)
Cpdfposition.orientation ->(*orientation*) 'a ->(*orientation*)
bool ->(*relative to cropbox?*) bool ->(*relative to cropbox?*)
float ->(*opacity*) float ->(*opacity*)
justification ->(*justification*) justification ->(*justification*)

View File

@ -396,7 +396,6 @@ type args =
mutable bates : int; mutable bates : int;
mutable batespad : int option; mutable batespad : int option;
mutable prerotate : bool; mutable prerotate : bool;
mutable orientation : Cpdfposition.orientation;
mutable relative_to_cropbox : bool; mutable relative_to_cropbox : bool;
mutable keepversion : bool; mutable keepversion : bool;
mutable bycolumns : bool; mutable bycolumns : bool;
@ -519,7 +518,6 @@ let args =
bates = 0; bates = 0;
batespad = None; batespad = None;
prerotate = false; prerotate = false;
orientation = Cpdfposition.Horizontal;
relative_to_cropbox = false; relative_to_cropbox = false;
keepversion = false; keepversion = false;
bycolumns = false; bycolumns = false;
@ -642,7 +640,6 @@ let reset_arguments () =
args.bates <- 0; args.bates <- 0;
args.batespad <- None; args.batespad <- None;
args.prerotate <- false; args.prerotate <- false;
args.orientation <- Cpdfposition.Horizontal;
args.relative_to_cropbox <- false; args.relative_to_cropbox <- false;
args.keepversion <- false; args.keepversion <- false;
args.bycolumns <- false; args.bycolumns <- false;
@ -1472,12 +1469,6 @@ let setimpath p =
let setp2ppath p = let setp2ppath p =
args.path_to_p2p <- p args.path_to_p2p <- p
let settextvertical () =
args.orientation <- Cpdfposition.Vertical
let settextverticaldown () =
args.orientation <- Cpdfposition.VerticalDown
let setfrombox s = let setfrombox s =
detect_duplicate_op CopyBox; detect_duplicate_op CopyBox;
args.op <- Some CopyBox; args.op <- Some CopyBox;
@ -2527,8 +2518,6 @@ and specs =
("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), ""); ("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), "");
("-stay-on-error", Arg.Unit setstayonerror, ""); ("-stay-on-error", Arg.Unit setstayonerror, "");
("-extract-fontfile", Arg.Unit (setop ExtractFontFile), ""); ("-extract-fontfile", Arg.Unit (setop ExtractFontFile), "");
("-text-vertical", Arg.Unit settextvertical, "");
("-text-vertical-down", Arg.Unit settextverticaldown, "");
("-flat-kids", Arg.Unit setflatkids, ""); ("-flat-kids", Arg.Unit setflatkids, "");
("-debug", Arg.Unit setdebug, ""); ("-debug", Arg.Unit setdebug, "");
("-debug-crypt", Arg.Unit setdebugcrypt, ""); ("-debug-crypt", Arg.Unit setdebugcrypt, "");
@ -3736,7 +3725,7 @@ let go () =
args.linewidth args.outline args.fast args.fontname args.linewidth args.outline args.fast args.fontname
font args.embedfonts args.bates args.batespad args.color args.position font args.embedfonts args.bates args.batespad args.color args.position
args.linespacing args.fontsize args.underneath text range args.linespacing args.fontsize args.underneath text range
args.orientation args.relative_to_cropbox args.opacity () args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename args.justification args.midline args.topline filename
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf) args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText -> | Some RemoveText ->

View File

@ -142,7 +142,7 @@ let scale_page_contents ?(fast=false) scale position pdf pnum page =
| Some r -> r | Some r -> r
| None -> page.Pdfpage.mediabox) | None -> page.Pdfpage.mediabox)
in in
let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in let sx, sy, _ = Cpdfposition.calculate_position true 0. box position in
let tx, ty = let tx, ty =
let open Cpdfposition in let open Cpdfposition in
match position with match position with

View File

@ -32,14 +32,9 @@ let string_of_position = function
| ReverseDiagonal -> "Reverse Diagonal" | ReverseDiagonal -> "Reverse Diagonal"
| Centre -> "Centre" | Centre -> "Centre"
type orientation =
| Horizontal
| Vertical
| VerticalDown
(* Given the mediabox, calculate an absolute position for the text. *) (* Given the mediabox, calculate an absolute position for the text. *)
let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos = let calculate_position ignore_d w (xmin, ymin, xmax, ymax) pos =
let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in let rot = 0. in
match pos with match pos with
| Centre -> | Centre ->
(xmin +. xmax) /. 2. -. w /. 2., (xmin +. xmax) /. 2. -. w /. 2.,

View File

@ -20,9 +20,6 @@ type position =
(** Produce a debug string of a [position] *) (** Produce a debug string of a [position] *)
val string_of_position : position -> string val string_of_position : position -> string
(** Orientation of the string on the page *)
type orientation = Horizontal | Vertical | VerticalDown
(** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates (** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates
the absolute position of text given its width, bounding box, orientation and the absolute position of text given its width, bounding box, orientation and
position. If [ignore_d] is true, the distance from the position (e.g 10 in position. If [ignore_d] is true, the distance from the position (e.g 10 in
@ -31,4 +28,4 @@ val calculate_position :
bool -> bool ->
float -> float ->
float * float * float * float -> float * float * float * float ->
orientation -> position -> float * float * float position -> float * float * float