mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix tokenization and whitespace issues with llama-derived models
Work around the 'soft' prefix space behavior of sentencepiece. Override encode to restore the deleted HF support for decode_with_prefix_space. Override decode to skip the soft space and return true decoded tokens. Allow submitting chat messages with embedded newlines. Split sentences between punctuation and whitespace, rather than after whitespace. Also include trailing quotes and brackets after sentence stoppers. This avoids splitting ." and .) into two tokens, for instance. Insert whitespace at the beginning of the author's note, since sentences are split with leading whitespace. Remove spurious newlines at the end of chat responses.
This commit is contained in:
8
utils.py
8
utils.py
@@ -144,21 +144,17 @@ def singlelineprocessing(txt, koboldai_vars):
|
||||
return txt
|
||||
|
||||
def chatmodeprocessing(txt, koboldai_vars):
|
||||
chatregex = re.compile(r'%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
|
||||
chatregex = re.compile(r'\s+%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
|
||||
txt = chatregex.sub('', txt)
|
||||
if(len(koboldai_vars.actions) > 0):
|
||||
if(len(koboldai_vars.actions[-1]) > 0):
|
||||
action = koboldai_vars.actions[-1]
|
||||
lastchar = action[-1] if len(action) else ""
|
||||
else:
|
||||
# Last action is blank, this should never happen, but
|
||||
# since it did let's bail out.
|
||||
return txt
|
||||
else:
|
||||
action = koboldai_vars.prompt
|
||||
lastchar = action[-1] if len(action) else ""
|
||||
if(lastchar != "\n"):
|
||||
txt = txt + "\n"
|
||||
return txt
|
||||
|
||||
#==================================================================#
|
||||
@@ -745,4 +741,4 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False)
|
||||
txt = txt.replace(sub["trueTarget"], sub["substitution"])
|
||||
txt = txt.replace(sub["target"], sub["substitution"])
|
||||
|
||||
return txt
|
||||
return txt
|
||||
|
Reference in New Issue
Block a user