From 137d056cb3011a16b55c44d840ee2b60a728d4fe Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 1 May 2023 10:48:45 -0400
Subject: [PATCH 1/6] Fix for pasting text in the middle of an action

---
 static/koboldai.js | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 7dfc4a5e..2d62103d 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3081,6 +3081,7 @@ function gametextwatcher(records) {
 	//Here we want to take care of two possible events
 	//User deleted an action. For this we'll restore the action and set it's text to "" and mark it as dirty
 	//User changes text. For this we simply mark it as dirty
+	console.log(records);
 	var game_text = document.getElementById("Selected Text");
 	for (const record of records) {
 		if ((record.type === "childList") && (record.removedNodes.length > 0)) {
@@ -3129,7 +3130,7 @@ function gametextwatcher(records) {
 			}
 		}
 	}
-	//console.log(dirty_chunks);
+	console.log(dirty_chunks);
 }
 
 function fix_dirty_game_text() {
@@ -3165,6 +3166,11 @@ function fix_dirty_game_text() {
 				if (!dirty_chunks.includes(node.previousElementSibling.getAttribute("chunk"))) {
 					dirty_chunks.push(node.previousElementSibling.getAttribute("chunk"));
 				}
+				//Looks like sometimes it splits the parent. Let's look for that and fix it too
+				if (node.nextElementSibling && (node.nextElementSibling.getAttribute("chunk") == node.previousElementSibling.getAttribute("chunk"))) {
+					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node.nextElementSibling.innerText;
+					node.nextElementSibling.remove();
+				}
 				node.remove();
 			}
 		}

From 5a32159e58a205cb5e6ea94f0ce1a0c6029e218d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 1 May 2023 10:53:02 -0400
Subject: [PATCH 2/6] Remove debug prints

---
 aiserver.py        | 2 +-
 static/koboldai.js | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 82b14969..d08e21c1 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8671,7 +8671,7 @@ def UI_2_download_story():
 @logger.catch
 def UI_2_Set_Selected_Text(data):
     if not koboldai_vars.quiet:
-        print("Updating Selected Text: {}".format(data))
+        logger.info("Updating Selected Text: {}".format(data))
     action_id = int(data["id"])
 
     if not koboldai_vars.actions.actions[action_id].get("Original Text"):
diff --git a/static/koboldai.js b/static/koboldai.js
index 2d62103d..7cc7a7e5 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3081,7 +3081,6 @@ function gametextwatcher(records) {
 	//Here we want to take care of two possible events
 	//User deleted an action. For this we'll restore the action and set it's text to "" and mark it as dirty
 	//User changes text. For this we simply mark it as dirty
-	console.log(records);
 	var game_text = document.getElementById("Selected Text");
 	for (const record of records) {
 		if ((record.type === "childList") && (record.removedNodes.length > 0)) {
@@ -3130,7 +3129,6 @@ function gametextwatcher(records) {
 			}
 		}
 	}
-	console.log(dirty_chunks);
 }
 
 function fix_dirty_game_text() {
@@ -3155,8 +3153,6 @@ function fix_dirty_game_text() {
 		//Fixing text outside of chunks
 		for (node of game_text.childNodes) {
 			if ((!(node instanceof HTMLElement) || !node.hasAttribute("chunk")) && (node.textContent.trim() != "")) {
-				console.log("Found Node that needs to be combined");
-				console.log(node);
 				//We have a text only node. It should be moved into the previous chunk
 				if (node instanceof HTMLElement) {
 					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node.innerText;

From 376884854869528e169ccc7444b308cb41d1ae29 Mon Sep 17 00:00:00 2001
From: Llama <34464159+pi6am@users.noreply.github.com>
Date: Wed, 3 May 2023 01:27:11 -0700
Subject: [PATCH 3/6] Fix tokenization and whitespace issues with llama-derived
 models

Work around the 'soft' prefix space behavior of sentencepiece.
Override encode to restore the deleted HF support for decode_with_prefix_space.
Override decode to skip the soft space and return true decoded tokens.
Allow submitting chat messages with embedded newlines.
Split sentences between punctuation and whitespace, rather than after whitespace.
Also include trailing quotes and brackets after sentence stoppers.
This avoids splitting ." and .) into two tokens, for instance.
Insert whitespace at the beginning of the author's note, since sentences are
split with leading whitespace.
Remove spurious newlines at the end of chat responses.
---
 aiserver.py                     |  2 +-
 koboldai_settings.py            | 10 ++--
 modeling/inference_models/hf.py | 85 ++++++++++++++++++++++++++++++++-
 utils.py                        |  8 +---
 4 files changed, 94 insertions(+), 11 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 3c54beda..e0ca3eb3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3509,7 +3509,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                 botname = (koboldai_vars.botname + ":")
             else:
                 botname = ""
-            data = re.sub(r'\n+', ' ', data)
+            data = re.sub(r'\n+\Z', '', data)
             if(len(data)):
                 data = f"\n{koboldai_vars.chatname}: {data}\n{botname}"
         
diff --git a/koboldai_settings.py b/koboldai_settings.py
index b2cc720e..dfccd4ef 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -397,8 +397,8 @@ class koboldai_vars(object):
         
         ######################################### Setup Author's Note Data ########################################################
         authors_note_text = self.authornotetemplate.replace("<|>", self.authornote)
-        if len(authors_note_text) > 0 and authors_note_text[-1] not in [" ", "\n"]:
-            authors_note_text += " "
+        if len(authors_note_text) > 0 and authors_note_text[0] not in [" ", "\n"]:
+            authors_note_text = " " + authors_note_text
         authors_note_data = [[x, self.tokenizer.decode(x)] for x in self.tokenizer.encode(authors_note_text)]
         if used_tokens + len(authors_note_data) <= token_budget:
             used_tokens += len(authors_note_data)
@@ -1384,7 +1384,11 @@ class KoboldStoryRegister(object):
         self.action_count = -1
         # The id of the last submission action, or 0 if the last append was not a submission
         self.submission_id = 0
-        self.sentence_re = re.compile(r"[^.!?]*[.!?]+\"?\s*", re.S)
+        # A regular expression used to break the story into sentences so that the author's
+        # note can be inserted with minimal disruption. Avoid ending a sentance with
+        # whitespace because most tokenizers deal better with whitespace at the beginning of text.
+        # Search for sentence end delimeters (i.e. .!?) and also capture closing parenthesis and quotes.
+        self.sentence_re = re.compile(r".*?[.!?]+(?=[.!?\]\)}>'\"›»\s])[.!?\]\)}>'\"›»]*", re.S)
         self.story_settings = story_settings
         self.tts_model = None
         self.tortoise = None
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 013590ef..3f98f381 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -1,6 +1,7 @@
 import os
 from typing import Optional
 from transformers import AutoConfig
+import torch
 
 import utils
 import koboldai_settings
@@ -20,8 +21,90 @@ class HFInferenceModel(InferenceModel):
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
         if utils.koboldai_vars.model_type == "llama":
-            self.tokenizer.decode_with_prefix_space = True
+            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
+            self.tokenizer.decode_with_prefix_space = True # Note, not supported anymore, hence the workaround below.
             self.tokenizer.add_bos_token = False
+
+            # HF transformers no longer supports decode_with_prefix_space
+            # We work around this by wrapping decode, encode, and __call__
+            # with versions that work around the 'prefix space' misfeature
+            # of sentencepiece.
+            vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
+            has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
+
+            # Wrap 'decode' with a method that always returns text starting with a space
+            # when the head token starts with a space. This is what 'decode_with_prefix_space'
+            # used to do, and we implement it using the same technique (building a cache of
+            # tokens that should have a prefix space, and then prepending a space if the first
+            # token is in this set.) We also work around a bizarre behavior in which decoding
+            # a single token 13 behaves differently than decoding a squence containing only [13].
+            original_decode = type(self.tokenizer.tokenizer).decode
+            def decode_wrapper(self, token_ids, *args, **kwargs):
+                first = None
+                dim0 = False
+                if isinstance(token_ids, int):
+                    first = token_ids
+                    dim0 = True
+                elif isinstance(token_ids, torch.Tensor):
+                    # Tensors don't support the Python standard of 'empty is False'
+                    # and the special case of dimension 0 tensors also needs to be handled separately.
+                    if token_ids.dim() == 0:
+                        first = int(token_ids.item())
+                        dim0 = True
+                    elif len(token_ids) > 0:
+                        first = int(token_ids[0])
+                elif token_ids:
+                    first = token_ids[0]
+                result = original_decode(self, token_ids, *args, **kwargs)
+                if first is not None and first in has_prefix_space:
+                    result = " " + result
+                if dim0:
+                    # Work around this wonky behavior:
+                    #   >>> t.decode(13)
+                    #   '<0x0A>'
+                    #   >>> t.decode([13])
+                    #   '\n'
+                    # Not doing this causes token streaming to receive <0x0A> characters instead of newlines.
+                    result = result.replace('<0x0A>', '\n')
+                return result
+            # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
+            object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
+
+            # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
+            # The problem is that "Bob" at the start of text is encoded as if it is
+            # " Bob". This creates a problem because it means you can't split text, encode
+            # the pieces, concatenate the tokens, decode them, and get the original text back.
+            # The workaround is to prepend a known token that (1) starts with a space; and
+            # (2) is not the prefix of any other token. After searching through the vocab
+            # " ," (space comma) is the only token containing only printable ascii characters
+            # that fits this bill. By prepending ',' to the text, the original encode
+            # method always returns [1919, ...], where the tail of the sequence is the
+            # actual encoded result we want without the prefix space behavior.
+            original_encode = type(self.tokenizer.tokenizer).encode
+            def encode_wrapper(self, text, *args, **kwargs):
+                if type(text) is str:
+                    text = ',' + text
+                    result = original_encode(self, text, *args, **kwargs)
+                    result = result[1:]
+                else:
+                    result = original_encode(self, text, *args, **kwargs)
+                return result
+            object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
+
+            # Since 'encode' is documented as being deprecated, also override __call__.
+            # This doesn't appear to currently be used by KoboldAI, but doing so
+            # in case someone uses it in the future.
+            original_call = type(self.tokenizer.tokenizer).__call__
+            def call_wrapper(self, text, *args, **kwargs):
+                if type(text) is str:
+                    text = ',' + text
+                    result = original_call(self, text, *args, **kwargs)
+                    result = result[1:]
+                else:
+                    result = original_call(self, text, *args, **kwargs)
+                return result
+            object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
+
         elif utils.koboldai_vars.model_type == "opt":
             self.tokenizer._koboldai_header = self.tokenizer.encode("")
             self.tokenizer.add_bos_token = False
diff --git a/utils.py b/utils.py
index 3a9a884c..13ebb6a3 100644
--- a/utils.py
+++ b/utils.py
@@ -144,21 +144,17 @@ def singlelineprocessing(txt, koboldai_vars):
     return txt
 
 def chatmodeprocessing(txt, koboldai_vars):
-    chatregex = re.compile(r'%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
+    chatregex = re.compile(r'\s+%s:[.|\n|\W|\w]*'%koboldai_vars.chatname)
     txt = chatregex.sub('', txt)
     if(len(koboldai_vars.actions) > 0):
         if(len(koboldai_vars.actions[-1]) > 0):
             action = koboldai_vars.actions[-1]
-            lastchar = action[-1] if len(action) else ""
         else:
             # Last action is blank, this should never happen, but
             # since it did let's bail out.
             return txt
     else:
         action = koboldai_vars.prompt
-        lastchar = action[-1] if len(action) else ""
-    if(lastchar != "\n"):
-        txt = txt + "\n"
     return txt
 
 #==================================================================#
@@ -745,4 +741,4 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False)
             txt = txt.replace(sub["trueTarget"], sub["substitution"])
             txt = txt.replace(sub["target"], sub["substitution"])
     
-    return txt
\ No newline at end of file
+    return txt

From 0c9537e91019516bae1438e1007a6e28c4798353 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 3 May 2023 12:04:05 -0400
Subject: [PATCH 4/6] Potential fix for putting pasted text in wrong action

---
 static/koboldai.js | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 7cc7a7e5..cfc32d21 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3153,15 +3153,22 @@ function fix_dirty_game_text() {
 		//Fixing text outside of chunks
 		for (node of game_text.childNodes) {
 			if ((!(node instanceof HTMLElement) || !node.hasAttribute("chunk")) && (node.textContent.trim() != "")) {
-				//We have a text only node. It should be moved into the previous chunk
+				//We have a text only node. It should be moved into the previous chunk if it is marked as dirty, next node if not and it's dirty, or the previous if neither is dirty
+				var node_text = ""
 				if (node instanceof HTMLElement) {
-					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node.innerText;
+					node_text = node.innerText;
 				} else {
-					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node.data;
+					node_text = node.data;
 				}
-				if (!dirty_chunks.includes(node.previousElementSibling.getAttribute("chunk"))) {
-					dirty_chunks.push(node.previousElementSibling.getAttribute("chunk"));
+				if (!(node.nextElementSibling) || !(dirty_chunks.includes(node.nextElementSibling.getAttribute("chunk"))) || dirty_chunks.includes(node.previousElementSibling.getAttribute("chunk"))) {
+					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node_text;
+					if (!dirty_chunks.includes(node.previousElementSibling.getAttribute("chunk"))) {
+						dirty_chunks.push(node.previousElementSibling.getAttribute("chunk"));
+					}
+				} else {
+					node.nextElementSibling.innerText = node.nextElementSibling.innerText + node_text;
 				}
+				
 				//Looks like sometimes it splits the parent. Let's look for that and fix it too
 				if (node.nextElementSibling && (node.nextElementSibling.getAttribute("chunk") == node.previousElementSibling.getAttribute("chunk"))) {
 					node.previousElementSibling.innerText = node.previousElementSibling.innerText + node.nextElementSibling.innerText;

From 35d344b9518b3c51efbb447784e626c280b55a9e Mon Sep 17 00:00:00 2001
From: Llama <34464159+pi6am@users.noreply.github.com>
Date: Wed, 3 May 2023 09:48:16 -0700
Subject: [PATCH 5/6] Remove torch dependency and more generic dim0 workaround

Remove torch dependency from hf.py
Make workaround for dimension zero values of token_ids
more generic to handle every token, not just newlines.
---
 modeling/inference_models/hf.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 3f98f381..61f030b1 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -1,7 +1,6 @@
 import os
 from typing import Optional
 from transformers import AutoConfig
-import torch
 
 import utils
 import koboldai_settings
@@ -41,16 +40,24 @@ class HFInferenceModel(InferenceModel):
             original_decode = type(self.tokenizer.tokenizer).decode
             def decode_wrapper(self, token_ids, *args, **kwargs):
                 first = None
-                dim0 = False
+                # Note, the code below that wraps single-value token_ids in a list
+                # is to work around this wonky behavior:
+                #   >>> t.decode(13)
+                #   '<0x0A>'
+                #   >>> t.decode([13])
+                #   '\n'
+                # Not doing this causes token streaming to receive <0x0A> characters
+                # instead of newlines.
                 if isinstance(token_ids, int):
                     first = token_ids
-                    dim0 = True
-                elif isinstance(token_ids, torch.Tensor):
+                    token_ids = [first]
+                elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
                     # Tensors don't support the Python standard of 'empty is False'
-                    # and the special case of dimension 0 tensors also needs to be handled separately.
+                    # and the special case of dimension 0 tensors also needs to be
+                    # handled separately.
                     if token_ids.dim() == 0:
                         first = int(token_ids.item())
-                        dim0 = True
+                        token_ids = [first]
                     elif len(token_ids) > 0:
                         first = int(token_ids[0])
                 elif token_ids:
@@ -58,14 +65,6 @@ class HFInferenceModel(InferenceModel):
                 result = original_decode(self, token_ids, *args, **kwargs)
                 if first is not None and first in has_prefix_space:
                     result = " " + result
-                if dim0:
-                    # Work around this wonky behavior:
-                    #   >>> t.decode(13)
-                    #   '<0x0A>'
-                    #   >>> t.decode([13])
-                    #   '\n'
-                    # Not doing this causes token streaming to receive <0x0A> characters instead of newlines.
-                    result = result.replace('<0x0A>', '\n')
                 return result
             # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
             object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))

From a87d5d6f2360d192e23d9d7f76b9bd03ee02bcfd Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Wed, 3 May 2023 20:18:40 +0200
Subject: [PATCH 6/6] Remove HF's llama workaround

---
 modeling/inference_models/hf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 61f030b1..cd609fed 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -21,7 +21,6 @@ class HFInferenceModel(InferenceModel):
         # These are model specific tokenizer overrides if a model has bad defaults
         if utils.koboldai_vars.model_type == "llama":
             # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
-            self.tokenizer.decode_with_prefix_space = True # Note, not supported anymore, hence the workaround below.
             self.tokenizer.add_bos_token = False
 
             # HF transformers no longer supports decode_with_prefix_space