raw mode for charcodes

2021-11-03 12:15:15 +00:00 · 2021-11-03 12:15:15 +00:00 · cd68d4852d
parent 2c6c357bdc
commit cd68d4852d
5 changed files with 7 additions and 5 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 2.5 (Upcoming 2022)

-o Text added in existing fonts is now encoding-aware
+o Text added in existing fonts is now encoding-aware (plus new raw mode)
 o New operation -print-font-encoding
 o New -print-dict-entry operation prints values for a given key
 o Extend -remove-dict-entry to allow search
--- a/cpdf.ml
+++ b/cpdf.ml
@ -1122,7 +1122,7 @@ let print_fonts pdf =
 (* Process UTF8 text to /WinAnsiEncoding string (for standard 14) or whatever
   is in the font (for existing fonts). *)
 let charcodes_of_utf8 pdf font s =
-  let extractor = Pdftext.charcode_extractor_of_font pdf font in
+  let extractor = Pdftext.charcode_extractor_of_font ~debug:true pdf font in
  let codepoints = Pdftext.codepoints_of_utf8 s in
    implode (map char_of_int (option_map extractor codepoints))

@ -1517,7 +1517,7 @@ let unescape_string s =
 let
  addtexts metrics linewidth outline fast fontname (font : Pdftext.standard_font option) embed bates batespad colour position linespacing
  fontsize underneath text pages orientation cropbox opacity justification
-  midline topline filename extract_text_font_size shift pdf
+  midline topline filename extract_text_font_size shift ?(raw=false) pdf
 =
  if pages = [] then error "addtexts: empty page range" else
  (*flprint "addtexts:\n";
@ -1561,7 +1561,7 @@ let
            end
        | _ -> failwith "addtext: font not found B"
  in
-  let text = charcodes_of_utf8 pdf fontpdfobj text in
+  let text = if raw then text else charcodes_of_utf8 pdf fontpdfobj text in
    let lines = map unescape_string (split_at_newline text) in
      let pdf = ref pdf in
        let voffset =
--- a/cpdf.mli
+++ b/cpdf.mli
@ -194,6 +194,7 @@ val addtexts :
    string ->(*filename*)
    float option -> (*extract_text_font_size*)
    string -> (* shift *)
+    ?raw:bool -> (* raw *)
    Pdf.t ->(*pdf*)
    Pdf.t

--- a/cpdfcommand.ml
+++ b/cpdfcommand.ml
@ -4010,7 +4010,7 @@ let go () =
                   args.linespacing args.fontsize args.underneath text range
                   args.orientation args.relative_to_cropbox args.opacity
                   args.justification args.midline args.topline filename
-                   args.extract_text_font_size args.coord pdf)
+                   args.extract_text_font_size args.coord ~raw:(args.encoding = Raw) pdf)
  | Some RemoveText ->
      let pdf = get_single_pdf args.op false in
        let range = parse_pagespec_allow_empty pdf (get_pagespec ()) in
--- a/cpdfmanual.tex
+++ b/cpdfmanual.tex
@ -5,6 +5,7 @@
 %Document -bookmarks-json including mentioning UTF8
 %Document -list-annotations-json
 %Document -replace-dict-entry and search extension to -remove-dict-entry, and -print-dict-entry
+%Document new text lookup for -add-text and new -raw mode
 \documentclass{book}
 % Edit here to produce cpdfmanual.pdf, cpdflibmanual.pdf, pycpdfmanual.pdf etc.
 \usepackage{comment}\excludecomment{cpdflib}\excludecomment{pycpdflib}