raw mode for charcodes

This commit is contained in:
John Whitington 2021-11-03 12:15:15 +00:00
parent 2c6c357bdc
commit cd68d4852d
5 changed files with 7 additions and 5 deletions

View File

@ -1,6 +1,6 @@
2.5 (Upcoming 2022)
o Text added in existing fonts is now encoding-aware
o Text added in existing fonts is now encoding-aware (plus new raw mode)
o New operation -print-font-encoding
o New -print-dict-entry operation prints values for a given key
o Extend -remove-dict-entry to allow search

View File

@ -1122,7 +1122,7 @@ let print_fonts pdf =
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *)
let charcodes_of_utf8 pdf font s =
let extractor = Pdftext.charcode_extractor_of_font pdf font in
let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in
let codepoints = Pdftext.codepoints_of_utf8 s in
implode (map char_of_int (option_map extractor codepoints))
@ -1517,7 +1517,7 @@ let unescape_string s =
let
addtexts metrics linewidth outline fast fontname (font : Pdftext.standard_font option) embed bates batespad colour position linespacing
fontsize underneath text pages orientation cropbox opacity justification
midline topline filename extract_text_font_size shift pdf
midline topline filename extract_text_font_size shift ?(raw=false) pdf
=
if pages = [] then error "addtexts: empty page range" else
(*flprint "addtexts:\n";
@ -1561,7 +1561,7 @@ let
end
| _ -> failwith "addtext: font not found B"
in
let text = charcodes_of_utf8 pdf fontpdfobj text in
let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in
let lines = map unescape_string (split_at_newline text) in
let pdf = ref pdf in
let voffset =

View File

@ -194,6 +194,7 @@ val addtexts :
string ->(*filename*)
float option -> (*extract_text_font_size*)
string -> (* shift *)
?raw:bool -> (* raw *)
Pdf.t ->(*pdf*)
Pdf.t

View File

@ -4010,7 +4010,7 @@ let go () =
args.linespacing args.fontsize args.underneath text range
args.orientation args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename
args.extract_text_font_size args.coord pdf)
args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText ->
let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in

View File

@ -5,6 +5,7 @@
%Document -bookmarks-json including mentioning UTF8
%Document -list-annotations-json
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
%Document new text lookup for -add-text and new -raw mode
\documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}