Fix tokenization and whitespace issues with llama-derived models

Work around the 'soft' prefix space behavior of sentencepiece. Override encode to restore the deleted HF support for decode_with_prefix_space. Override decode to skip the soft space and return true decoded tokens. Allow submitting chat messages with embedded newlines. Split sentences between punctuation and whitespace, rather than after whitespace. Also include trailing quotes and brackets after sentence stoppers. This avoids splitting ." and .) into two tokens, for instance. Insert whitespace at the beginning of the author's note, since sentences are split with leading whitespace. Remove spurious newlines at the end of chat responses.
2025-06-05 21:59:24 +02:00 · 2023-05-03 01:27:11 -07:00
parent 507da6fcf7
commit 3768848548
4 changed files with 94 additions and 11 deletions
--- a/utils.py
+++ b/utils.py
@@ -144,21 +144,17 @@ def singlelineprocessing(txt, koboldai_vars):
    return txt

 def chatmodeprocessing(txt, koboldai_vars):
-    chatregex = re.compile(r'%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
+    chatregex = re.compile(r'\s+%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
    txt = chatregex.sub('', txt)
    if(len(koboldai_vars.actions) > 0):
        if(len(koboldai_vars.actions[-1]) > 0):
            action = koboldai_vars.actions[-1]
-            lastchar = action[-1] if len(action) else ""
        else:
            # Last action is blank, this should never happen, but
            # since it did let's bail out.
            return txt
    else:
        action = koboldai_vars.prompt
-        lastchar = action[-1] if len(action) else ""
-    if(lastchar != "\n"):
-        txt = txt + "\n"
    return txt

 #==================================================================#
@@ -745,4 +741,4 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False)
            txt = txt.replace(sub["trueTarget"], sub["substitution"])
            txt = txt.replace(sub["target"], sub["substitution"])
    
-    return txt
+    return txt