From cd68d4852de89de53f9edff6410b5acf722f5b1b Mon Sep 17 00:00:00 2001 From: John Whitington Date: Wed, 3 Nov 2021 12:15:15 +0000 Subject: [PATCH] raw mode for charcodes --- Changes | 2 +- cpdf.ml | 6 +++--- cpdf.mli | 1 + cpdfcommand.ml | 2 +- cpdfmanual.tex | 1 + 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Changes b/Changes index 05defbb..b59edf4 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,6 @@ 2.5 (Upcoming 2022) -o Text added in existing fonts is now encoding-aware +o Text added in existing fonts is now encoding-aware (plus new raw mode) o New operation -print-font-encoding o New -print-dict-entry operation prints values for a given key o Extend -remove-dict-entry to allow search diff --git a/cpdf.ml b/cpdf.ml index aedfac4..af3e888 100644 --- a/cpdf.ml +++ b/cpdf.ml @@ -1122,7 +1122,7 @@ let print_fonts pdf = (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever is in the font (for existing fonts). *) let charcodes_of_utf8 pdf font s = - let extractor = Pdftext.charcode_extractor_of_font pdf font in + let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in let codepoints = Pdftext.codepoints_of_utf8 s in implode (map char_of_int (option_map extractor codepoints)) @@ -1517,7 +1517,7 @@ let unescape_string s = let addtexts metrics linewidth outline fast fontname (font : Pdftext.standard_font option) embed bates batespad colour position linespacing fontsize underneath text pages orientation cropbox opacity justification - midline topline filename extract_text_font_size shift pdf + midline topline filename extract_text_font_size shift ?(raw=false) pdf = if pages = [] then error "addtexts: empty page range" else (*flprint "addtexts:\n"; @@ -1561,7 +1561,7 @@ let end | _ -> failwith "addtext: font not found B" in - let text = charcodes_of_utf8 pdf fontpdfobj text in + let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in let lines = map unescape_string (split_at_newline text) in let pdf = ref pdf in let voffset = diff --git a/cpdf.mli b/cpdf.mli index ab554c8..71797d1 100644 --- a/cpdf.mli +++ b/cpdf.mli @@ -194,6 +194,7 @@ val addtexts : string ->(*filename*) float option -> (*extract_text_font_size*) string -> (* shift *) + ?raw:bool -> (* raw *) Pdf.t ->(*pdf*) Pdf.t diff --git a/cpdfcommand.ml b/cpdfcommand.ml index ebd2c03..9c930b2 100644 --- a/cpdfcommand.ml +++ b/cpdfcommand.ml @@ -4010,7 +4010,7 @@ let go () = args.linespacing args.fontsize args.underneath text range args.orientation args.relative_to_cropbox args.opacity args.justification args.midline args.topline filename - args.extract_text_font_size args.coord pdf) + args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf) | Some RemoveText -> let pdf = get_single_pdf args.op false in let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in diff --git a/cpdfmanual.tex b/cpdfmanual.tex index e65fe6b..961a279 100644 --- a/cpdfmanual.tex +++ b/cpdfmanual.tex @@ -5,6 +5,7 @@ %Document -bookmarks-json including mentioning UTF8 %Document -list-annotations-json %Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry +%Document new text lookup for -add-text and new -raw mode \documentclass{book} % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc. \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}