From 256a74bd0f00bf63fe3a88989f7346553432f16a Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 21 Sep 2022 17:40:28 +0100 Subject: [PATCH] more --- cpdfaddtext.ml | 80 ++++++++++++++++++++++-------------------------- cpdfaddtext.mli | 2 +- cpdfcommand.ml | 13 +------- cpdfpage.ml | 2 +- cpdfposition.ml | 9 ++---- cpdfposition.mli | 5 +-- 6 files changed, 42 insertions(+), 69 deletions(-) diff --git a/cpdfaddtext.ml b/cpdfaddtext.ml index c077ed0..e1735e2 100644 --- a/cpdfaddtext.ml +++ b/cpdfaddtext.ml @@ -1,3 +1,4 @@ +(* Superimpose text, page numbers etc. *) open Pdfutil open Cpdferror @@ -6,10 +7,7 @@ type color = | RGB of float * float * float | CYMK of float * float * float * float -(* Superimpose text, page numbers etc. *) - -(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever - is in the font (for existing fonts). *) +(* Process UTF8 text to charcodes, given a font. *) let charcodes_of_utf8 font s = let extractor = Pdftext.charcode_extractor_of_font_real ~debug:false font in let codepoints = Pdftext.codepoints_of_utf8 s in @@ -23,15 +21,6 @@ let charcodes_of_utf8 font s = in implode (map char_of_int charcodes) -(* Process codepoints back to UTF8, assuming it came from UTF8 to start with *) -let utf8_of_winansi s = - let text_extractor = - Pdftext.text_extractor_of_font_real - (Pdftext.StandardFont (Pdftext.TimesRoman, Pdftext.WinAnsiEncoding)) - in - let codepoints = Pdftext.codepoints_of_text text_extractor s in - Pdftext.utf8_of_codepoints codepoints - (* Get the width of some text in the given font *) let width_of_text font text = match font with @@ -151,7 +140,13 @@ let cap_height fontname = with _ -> None -let make_font embed fontname = +let rec string_of_encoding = function + | Pdftext.StandardEncoding -> "StandardEncoding" + | Pdftext.MacRomanEncoding -> "MacRomanEncoding" + | Pdftext.WinAnsiEncoding -> "WinAnsiEncoding" + | _ -> error "unknown encoding" + +let make_font embed encoding fontname = let font = unopt (Pdftext.standard_font_of_name ("/" ^ fontname)) in let header, width_data, _, chars_and_widths = Pdfstandard14.afm_data font in let widths = extract_widths (list_of_hashtbl chars_and_widths) in @@ -174,9 +169,6 @@ let make_font embed fontname = ("/CapHeight", capheight); ("/StemV", Pdf.Integer stemv)] in - (* With -no-embed-font, we use the standard encoding, and just the - * minimal stuff. Without -no-embed-font, we switch to WinAnsiEncoding, - * and fill out everything except the font file instead *) if embed then Pdf.Dictionary [("/Type", Pdf.Name "/Font"); @@ -290,7 +282,8 @@ let pagelabel pdf num = (Pdfpagelabels.complete (Pdfpagelabels.read pdf)) let addtext - lines linewidth outline fast colour fontname embed bates batespad fontsize (font : Pdftext.font option) + lines linewidth outline fast colour fontname encoding embed bates batespad fontsize + (font : Pdftext.font option) fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity justification filename extract_text_font_size shift pdf = @@ -345,11 +338,7 @@ let addtext match font with | Some (Pdftext.StandardFont (f, _)) -> let rawwidth = - Pdfstandard14.textwidth - false - (if embed then Pdftext.WinAnsiEncoding else Pdftext.StandardEncoding) - f - text + Pdfstandard14.textwidth false encoding f text in (float rawwidth *. fontsize) /. 1000. | Some font -> @@ -403,7 +392,7 @@ let addtext else Pdf.parse_rectangle pdf page.Pdfpage.mediabox in - let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox orientation position in + let x, y, rotate = Cpdfposition.calculate_position false textwidth mediabox position in let hoffset, voffset = if position = Diagonal || position = ReverseDiagonal then -. (cos ((pi /. 2.) -. rotate) *. voffset), sin ((pi /. 2.) -. rotate) *. voffset @@ -423,7 +412,7 @@ let addtext match font with | Some (Pdftext.StandardFont _) -> let newfontdict = - Pdf.add_dict_entry fontdict unique_fontname (make_font embed fontname) + Pdf.add_dict_entry fontdict unique_fontname (make_font embed encoding fontname) in Pdf.add_dict_entry resources' "/Font" newfontdict | Some f -> @@ -510,8 +499,8 @@ let let realfontname = ref fontname in let fontpdfobj = match font with - | Some (StandardFont (f, _)) -> - make_font embed (Pdftext.string_of_standard_font f) + | Some (StandardFont (f, encoding)) -> + make_font embed encoding (Pdftext.string_of_standard_font f) | Some f -> Pdf.Indirect (Pdftext.write_font pdf f) | None -> @@ -577,22 +566,25 @@ let voffset := !voffset +. capheight | _ -> Printf.eprintf "Unable to find midline adjustment in this font\n" - end - else - iter - (fun line -> - let voff, hoff = - if orientation = Cpdfposition.Vertical then 0., -.(!voffset) else !voffset, 0. - in - pdf := - addtext lines linewidth outline fast colour !realfontname - embed bates batespad fontsize font fontpdfobj underneath position hoff voff line - pages orientation cropbox opacity justification filename - extract_text_font_size shift - !pdf; - voffset := !voffset +. (linespacing *. fontsize)) - lines; - !pdf + end; + let encoding = + match font with + | Some (Pdftext.StandardFont (_, e)) -> e + | Some (Pdftext.SimpleFont {encoding}) -> encoding + | _ -> Pdftext.WinAnsiEncoding + in + iter + (fun line -> + let voff, hoff = !voffset, 0. in + pdf := + addtext lines linewidth outline fast colour !realfontname encoding + embed bates batespad fontsize font fontpdfobj underneath position hoff voff line + pages orientation cropbox opacity justification filename + extract_text_font_size shift + !pdf; + voffset := !voffset +. (linespacing *. fontsize)) + lines; + !pdf let removetext range pdf = (* Could fail on nesting, or other marked content inside our marked content.*) @@ -652,7 +644,7 @@ let addrectangle Pdf.parse_rectangle pdf page.Pdfpage.mediabox in let x, y, _ = - Cpdfposition.calculate_position false w mediabox Cpdfposition.Horizontal position + Cpdfposition.calculate_position false w mediabox position in let x, y = match position with diff --git a/cpdfaddtext.mli b/cpdfaddtext.mli index 827dca5..908788c 100644 --- a/cpdfaddtext.mli +++ b/cpdfaddtext.mli @@ -34,7 +34,7 @@ val addtexts : bool -> (*underneath*) string ->(*text*) int list ->(*page range*) - Cpdfposition.orientation ->(*orientation*) + 'a ->(*orientation*) bool ->(*relative to cropbox?*) float ->(*opacity*) justification ->(*justification*) diff --git a/cpdfcommand.ml b/cpdfcommand.ml index bfee92b..f7676a0 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -396,7 +396,6 @@ type args = mutable bates : int; mutable batespad : int option; mutable prerotate : bool; - mutable orientation : Cpdfposition.orientation; mutable relative_to_cropbox : bool; mutable keepversion : bool; mutable bycolumns : bool; @@ -519,7 +518,6 @@ let args = bates = 0; batespad = None; prerotate = false; - orientation = Cpdfposition.Horizontal; relative_to_cropbox = false; keepversion = false; bycolumns = false; @@ -642,7 +640,6 @@ let reset_arguments () = args.bates <- 0; args.batespad <- None; args.prerotate <- false; - args.orientation <- Cpdfposition.Horizontal; args.relative_to_cropbox <- false; args.keepversion <- false; args.bycolumns <- false; @@ -1472,12 +1469,6 @@ let setimpath p = let setp2ppath p = args.path_to_p2p <- p -let settextvertical () = - args.orientation <- Cpdfposition.Vertical - -let settextverticaldown () = - args.orientation <- Cpdfposition.VerticalDown - let setfrombox s = detect_duplicate_op CopyBox; args.op <- Some CopyBox; @@ -2527,8 +2518,6 @@ and specs = ("-remove-unused-resources", Arg.Unit (setop RemoveUnusedResources), ""); ("-stay-on-error", Arg.Unit setstayonerror, ""); ("-extract-fontfile", Arg.Unit (setop ExtractFontFile), ""); - ("-text-vertical", Arg.Unit settextvertical, ""); - ("-text-vertical-down", Arg.Unit settextverticaldown, ""); ("-flat-kids", Arg.Unit setflatkids, ""); ("-debug", Arg.Unit setdebug, ""); ("-debug-crypt", Arg.Unit setdebugcrypt, ""); @@ -3736,7 +3725,7 @@ let go () = args.linewidth args.outline args.fast args.fontname font args.embedfonts args.bates args.batespad args.color args.position args.linespacing args.fontsize args.underneath text range - args.orientation args.relative_to_cropbox args.opacity + () args.relative_to_cropbox args.opacity args.justification args.midline args.topline filename args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf) | Some RemoveText -> diff --git a/cpdfpage.ml b/cpdfpage.ml index 4e1a64b..49bda11 100644 --- a/cpdfpage.ml +++ b/cpdfpage.ml @@ -142,7 +142,7 @@ let scale_page_contents ?(fast=false) scale position pdf pnum page = | Some r -> r | None -> page.Pdfpage.mediabox) in - let sx, sy, _ = Cpdfposition.calculate_position true 0. box Horizontal position in + let sx, sy, _ = Cpdfposition.calculate_position true 0. box position in let tx, ty = let open Cpdfposition in match position with diff --git a/cpdfposition.ml b/cpdfposition.ml index b139603..a81ba3a 100644 --- a/cpdfposition.ml +++ b/cpdfposition.ml @@ -32,14 +32,9 @@ let string_of_position = function | ReverseDiagonal -> "Reverse Diagonal" | Centre -> "Centre" -type orientation = - | Horizontal - | Vertical - | VerticalDown - (* Given the mediabox, calculate an absolute position for the text. *) -let calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos = - let rot = if orientation = VerticalDown then rad_of_deg 270. else 0. in +let calculate_position ignore_d w (xmin, ymin, xmax, ymax) pos = + let rot = 0. in match pos with | Centre -> (xmin +. xmax) /. 2. -. w /. 2., diff --git a/cpdfposition.mli b/cpdfposition.mli index a38ce3a..bf715cc 100644 --- a/cpdfposition.mli +++ b/cpdfposition.mli @@ -20,9 +20,6 @@ type position = (** Produce a debug string of a [position] *) val string_of_position : position -> string -(** Orientation of the string on the page *) -type orientation = Horizontal | Vertical | VerticalDown - (** [calculate_position ignore_d w (xmin, ymin, xmax, ymax) orientation pos] calculates the absolute position of text given its width, bounding box, orientation and position. If [ignore_d] is true, the distance from the position (e.g 10 in @@ -31,4 +28,4 @@ val calculate_position : bool -> float -> float * float * float * float -> - orientation -> position -> float * float * float + position -> float * float * float