raw mode for charcodes

This commit is contained in:
John Whitington 2021-11-03 12:15:15 +00:00
parent 2c6c357bdc
commit cd68d4852d
5 changed files with 7 additions and 5 deletions

View File

@ -1,6 +1,6 @@
2.5 (Upcoming 2022) 2.5 (Upcoming 2022)
o Text added in existing fonts is now encoding-aware o Text added in existing fonts is now encoding-aware (plus new raw mode)
o New operation -print-font-encoding o New operation -print-font-encoding
o New -print-dict-entry operation prints values for a given key o New -print-dict-entry operation prints values for a given key
o Extend -remove-dict-entry to allow search o Extend -remove-dict-entry to allow search

View File

@ -1122,7 +1122,7 @@ let print_fonts pdf =
(* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
is in the font (for existing fonts). *) is in the font (for existing fonts). *)
let charcodes_of_utf8 pdf font s = let charcodes_of_utf8 pdf font s =
let extractor = Pdftext.charcode_extractor_of_font pdf font in let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in
let codepoints = Pdftext.codepoints_of_utf8 s in let codepoints = Pdftext.codepoints_of_utf8 s in
implode (map char_of_int (option_map extractor codepoints)) implode (map char_of_int (option_map extractor codepoints))
@ -1517,7 +1517,7 @@ let unescape_string s =
let let
addtexts metrics linewidth outline fast fontname (font : Pdftext.standard_font option) embed bates batespad colour position linespacing addtexts metrics linewidth outline fast fontname (font : Pdftext.standard_font option) embed bates batespad colour position linespacing
fontsize underneath text pages orientation cropbox opacity justification fontsize underneath text pages orientation cropbox opacity justification
midline topline filename extract_text_font_size shift pdf midline topline filename extract_text_font_size shift ?(raw=false) pdf
= =
if pages = [] then error "addtexts: empty page range" else if pages = [] then error "addtexts: empty page range" else
(*flprint "addtexts:\n"; (*flprint "addtexts:\n";
@ -1561,7 +1561,7 @@ let
end end
| _ -> failwith "addtext: font not found B" | _ -> failwith "addtext: font not found B"
in in
let text = charcodes_of_utf8 pdf fontpdfobj text in let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in
let lines = map unescape_string (split_at_newline text) in let lines = map unescape_string (split_at_newline text) in
let pdf = ref pdf in let pdf = ref pdf in
let voffset = let voffset =

View File

@ -194,6 +194,7 @@ val addtexts :
string ->(*filename*) string ->(*filename*)
float option -> (*extract_text_font_size*) float option -> (*extract_text_font_size*)
string -> (* shift *) string -> (* shift *)
?raw:bool -> (* raw *)
Pdf.t ->(*pdf*) Pdf.t ->(*pdf*)
Pdf.t Pdf.t

View File

@ -4010,7 +4010,7 @@ let go () =
args.linespacing args.fontsize args.underneath text range args.linespacing args.fontsize args.underneath text range
args.orientation args.relative_to_cropbox args.opacity args.orientation args.relative_to_cropbox args.opacity
args.justification args.midline args.topline filename args.justification args.midline args.topline filename
args.extract_text_font_size args.coord pdf) args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
| Some RemoveText -> | Some RemoveText ->
let pdf = get_single_pdf args.op false in let pdf = get_single_pdf args.op false in
let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in

View File

@ -5,6 +5,7 @@
%Document -bookmarks-json including mentioning UTF8 %Document -bookmarks-json including mentioning UTF8
%Document -list-annotations-json %Document -list-annotations-json
%Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry %Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
%Document new text lookup for -add-text and new -raw mode
\documentclass{book} \documentclass{book}
% Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
\usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib} \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}