This commit is contained in:
John Whitington 2022-09-27 16:28:34 +01:00
parent ec1ef96d21
commit 4d666417e1
3 changed files with 19 additions and 29 deletions

View File

@ -23,7 +23,9 @@ let charcodes_of_utf8 font s =
(fun codepoint -> (fun codepoint ->
match extractor codepoint with match extractor codepoint with
| Some cc -> Some cc | Some cc -> Some cc
| None -> Printf.eprintf "Warning: character not found in font for unicode codepoint 0x%X\n" codepoint; None) | None ->
Printf.eprintf "Warning: character not found in font for unicode codepoint 0x%X\n" codepoint;
None)
codepoints codepoints
in in
implode (map char_of_int charcodes) implode (map char_of_int charcodes)
@ -150,8 +152,8 @@ let extract_widths chars_and_widths =
(ilist 0 255) (ilist 0 255)
(* For finding the height for URL links, we try to find the Cap Height for the (* For finding the height for URL links, we try to find the Cap Height for the
font. For now, this will only work for built-in fonts. We fall back to using font. We fall back to using the font size alone if we cannot get the cap
the font size alone if we cannot get the cap height. *) height. *)
let cap_height font fontname = let cap_height font fontname =
match font with match font with
| Some (Pdftext.SimpleFont {fontdescriptor = Some {capheight}}) -> | Some (Pdftext.SimpleFont {fontdescriptor = Some {capheight}}) ->
@ -165,15 +167,6 @@ let cap_height font fontname =
with with
_ -> None _ -> None
let rec string_of_encoding = function
| Pdftext.StandardEncoding -> "StandardEncoding"
| Pdftext.MacRomanEncoding -> "MacRomanEncoding"
| Pdftext.WinAnsiEncoding -> "WinAnsiEncoding"
| _ -> error "unknown encoding"
let make_font embed pdf font =
Pdf.Indirect (Pdftext.write_font pdf font)
let extract_page_text only_fontsize pdf _ page = let extract_page_text only_fontsize pdf _ page =
let text_extractor = ref None in let text_extractor = ref None in
let right_font_size = ref false in let right_font_size = ref false in
@ -270,10 +263,9 @@ let pagelabel pdf num =
(Pdfpagelabels.complete (Pdfpagelabels.read pdf)) (Pdfpagelabels.complete (Pdfpagelabels.read pdf))
let addtext let addtext
time lines linewidth outline fast colour fontname encoding embed bates batespad fontsize time lines linewidth outline fast colour fontname encoding bates batespad
(font : Pdftext.font option) fontsize font fontpdfobj underneath position hoffset voffset text pages
fontpdfobj underneath position hoffset voffset text pages orientation cropbox opacity cropbox opacity justification filename extract_text_font_size shift pdf
justification filename extract_text_font_size shift pdf
= =
let endpage = Pdfpage.endpage pdf in let endpage = Pdfpage.endpage pdf in
let replace_pairs pdf filename bates batespad num page = let replace_pairs pdf filename bates batespad num page =
@ -399,7 +391,7 @@ let addtext
match font with match font with
| Some (Pdftext.StandardFont _ as font) -> | Some (Pdftext.StandardFont _ as font) ->
let newfontdict = let newfontdict =
Pdf.add_dict_entry fontdict unique_fontname (make_font embed pdf font) Pdf.add_dict_entry fontdict unique_fontname (Pdf.Indirect (Pdftext.write_font pdf font))
in in
Pdf.add_dict_entry resources' "/Font" newfontdict Pdf.add_dict_entry resources' "/Font" newfontdict
| Some f -> | Some f ->
@ -478,17 +470,18 @@ let unescape_string s =
implode (unescape_chars [] (explode s)) implode (unescape_chars [] (explode s))
let let
addtexts ?embedinfo linewidth outline fast fontname (font : Pdftext.font option) embed bates batespad colour position linespacing addtexts ?embedinfo linewidth outline fast fontname font bates batespad
fontsize underneath text pages orientation cropbox opacity justification colour position linespacing fontsize underneath text pages cropbox opacity
midline topline filename extract_text_font_size shift ?(raw=false) pdf justification midline topline filename extract_text_font_size shift
?(raw=false) pdf
= =
let time = Cpdfstrftime.current_time () in let time = Cpdfstrftime.current_time () in
if pages = [] then error "addtexts: empty page range" else if pages = [] then error "addtexts: empty page range" else
let realfontname = ref fontname in let realfontname = ref fontname in
let fontpdfobj = let fontpdfobj =
match font with match font with
| Some (StandardFont _ as font) -> | Some (Pdftext.StandardFont _ as font) ->
make_font embed pdf font Pdf.Indirect (Pdftext.write_font pdf font)
| Some f -> | Some f ->
begin match Hashtbl.find glob_pdfobjnum fontname with begin match Hashtbl.find glob_pdfobjnum fontname with
| exception Not_found -> | exception Not_found ->
@ -572,9 +565,8 @@ let
let voff, hoff = !voffset, 0. in let voff, hoff = !voffset, 0. in
pdf := pdf :=
addtext time lines linewidth outline fast colour !realfontname encoding addtext time lines linewidth outline fast colour !realfontname encoding
embed bates batespad fontsize font fontpdfobj underneath position hoff voff line bates batespad fontsize font fontpdfobj underneath position hoff voff line
pages orientation cropbox opacity justification filename pages cropbox opacity justification filename extract_text_font_size shift
extract_text_font_size shift
!pdf; !pdf;
voffset := !voffset +. (linespacing *. fontsize)) voffset := !voffset +. (linespacing *. fontsize))
lines; lines;

View File

@ -25,7 +25,6 @@ val addtexts :
bool -> (*fast*) bool -> (*fast*)
string -> (*fontname*) string -> (*fontname*)
Pdftext.font option -> (*font*) Pdftext.font option -> (*font*)
bool -> (* embed font *)
int -> (* bates number *) int -> (* bates number *)
int option -> (* bates padding width *) int option -> (* bates padding width *)
color -> (*colour*) color -> (*colour*)
@ -35,7 +34,6 @@ val addtexts :
bool -> (*underneath*) bool -> (*underneath*)
string ->(*text*) string ->(*text*)
int list ->(*page range*) int list ->(*page range*)
'a ->(*orientation*)
bool ->(*relative to cropbox?*) bool ->(*relative to cropbox?*)
float ->(*opacity*) float ->(*opacity*)
justification ->(*justification*) justification ->(*justification*)

View File

@ -3721,9 +3721,9 @@ let go () =
write_pdf false write_pdf false
(Cpdfaddtext.addtexts (Cpdfaddtext.addtexts
?embedinfo args.linewidth args.outline args.fast args.fontname ?embedinfo args.linewidth args.outline args.fast args.fontname
font false (*args.embedstd14*) args.bates args.batespad args.color args.position font args.bates args.batespad args.color args.position
args.linespacing args.fontsize args.underneath text range args.linespacing args.fontsize args.underneath text range
() args.relative_to_cropbox args.opacity args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename args.justification args.midline args.topline filename
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf) args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText -> | Some RemoveText ->