From 73c06bf0a51bc73ab51f060213995abfe872f136 Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 15 Jun 2023 16:02:20 -0500
Subject: [PATCH 001/107] add adventuremode stopper

adds a stopper token for adventure mode when it detects the bot generating impersonating text after " > You"
---
 modeling/stoppers.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 94c09e85..0fc6ce10 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -116,6 +116,28 @@ class Stoppers:
             return True
         return False
 
+    def adventure_mode_stopper(
+        model: InferenceModel,
+        input_ids: torch.LongTensor,
+    ) -> bool:
+        if not utils.koboldai_vars.adventure:
+            return False
+
+        data = [model.tokenizer.decode(x) for x in input_ids]
+        # null_character = model.tokenizer.encode(chr(0))[0]
+        if "completed" not in model.gen_state:
+            model.gen_state["completed"] = [False] * len(input_ids)
+            
+        for i in range(len(input_ids)):
+            if (data[i][-6:] == " > You"):
+                model.gen_state["completed"][i] = True
+                
+        if all(model.gen_state["completed"]):
+            utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt
+            del model.gen_state["completed"]
+            return True
+        return False
+
     @staticmethod
     def stop_sequence_stopper(
         model: InferenceModel,

From 877028ec7f2125cd6d550d5a913ca6b602ddad69 Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 15 Jun 2023 16:07:54 -0500
Subject: [PATCH 002/107] Update hf_torch.py with adv mode stopper

---
 modeling/inference_models/hf_torch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 2f575e73..10fd3cb6 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -69,6 +69,7 @@ class HFTorchInferenceModel(HFInferenceModel):
             Stoppers.dynamic_wi_scanner,
             Stoppers.singleline_stopper,
             Stoppers.chat_mode_stopper,
+            Stoppers.adventure_mode_stopper,
             Stoppers.stop_sequence_stopper,
         ]
 

From 83493dff2e7d76fad008dad3cfe8b6c400134d34 Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 15 Jun 2023 17:15:33 -0500
Subject: [PATCH 003/107] modify adv stopper

---
 modeling/stoppers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 0fc6ce10..8fe5c229 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -129,7 +129,7 @@ class Stoppers:
             model.gen_state["completed"] = [False] * len(input_ids)
             
         for i in range(len(input_ids)):
-            if (data[i][-6:] == " > You"):
+            if (data[i][-6:] == "> You " or data[i][-4:] == "You:"):
                 model.gen_state["completed"][i] = True
                 
         if all(model.gen_state["completed"]):

From 8b742b2bd4e346e6ec45d4642a6af374402697ad Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 15 Jun 2023 17:20:38 -0500
Subject: [PATCH 004/107] add missing @staticmethod

---
 modeling/stoppers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 8fe5c229..d9f212dd 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -116,6 +116,7 @@ class Stoppers:
             return True
         return False
 
+    @staticmethod
     def adventure_mode_stopper(
         model: InferenceModel,
         input_ids: torch.LongTensor,

From 91d543bf5a41fda8dfd5d166b47f36bf5e420cb1 Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 29 Jun 2023 02:34:08 -0500
Subject: [PATCH 005/107] Update stoppers.py

---
 modeling/stoppers.py | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index d9f212dd..e36eb522 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -116,28 +116,6 @@ class Stoppers:
             return True
         return False
 
-    @staticmethod
-    def adventure_mode_stopper(
-        model: InferenceModel,
-        input_ids: torch.LongTensor,
-    ) -> bool:
-        if not utils.koboldai_vars.adventure:
-            return False
-
-        data = [model.tokenizer.decode(x) for x in input_ids]
-        # null_character = model.tokenizer.encode(chr(0))[0]
-        if "completed" not in model.gen_state:
-            model.gen_state["completed"] = [False] * len(input_ids)
-            
-        for i in range(len(input_ids)):
-            if (data[i][-6:] == "> You " or data[i][-4:] == "You:"):
-                model.gen_state["completed"][i] = True
-                
-        if all(model.gen_state["completed"]):
-            utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt
-            del model.gen_state["completed"]
-            return True
-        return False
 
     @staticmethod
     def stop_sequence_stopper(
@@ -149,7 +127,12 @@ class Stoppers:
         # null_character = model.tokenizer.encode(chr(0))[0]
         if "completed" not in model.gen_state:
             model.gen_state["completed"] = [False] * len(input_ids)
-        
+        if utils.koboldai_vars.adventure:
+            extra_options = ["> You", "You:", "\n\n You", "\n\nYou", ". You"]
+            for option in extra_options:
+                if option not in utils.koboldai_vars.stop_sequence:
+                    utils.koboldai_vars.stop_sequence.append(option)
+
         #one issue is that the stop sequence may not actual align with the end of token 
         #if its a subsection of a longer token
         for stopper in utils.koboldai_vars.stop_sequence:
@@ -163,6 +146,10 @@ class Stoppers:
         if all(model.gen_state["completed"]):
             utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt
             del model.gen_state["completed"]
+            if utils.koboldai_vars.adventure: # Remove added adventure mode stop sequences
+                for option in extra_options:
+                    if option in utils.koboldai_vars.stop_sequence:
+                        utils.koboldai_vars.stop_sequence.remove(option)
             return True
         return False
 

From 6e6c4ee5d4346e874265d63db9d154ddda9843bb Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 29 Jun 2023 02:34:57 -0500
Subject: [PATCH 006/107] Update hf_torch.py

---
 modeling/inference_models/hf_torch.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 10fd3cb6..2f575e73 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -69,7 +69,6 @@ class HFTorchInferenceModel(HFInferenceModel):
             Stoppers.dynamic_wi_scanner,
             Stoppers.singleline_stopper,
             Stoppers.chat_mode_stopper,
-            Stoppers.adventure_mode_stopper,
             Stoppers.stop_sequence_stopper,
         ]
 

From 13405d836e5cd092cdb2db8aec03e3b577d3d969 Mon Sep 17 00:00:00 2001
From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com>
Date: Thu, 29 Jun 2023 02:39:50 -0500
Subject: [PATCH 007/107] Update stoppers.py

---
 modeling/stoppers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index e36eb522..3f277f48 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -116,7 +116,6 @@ class Stoppers:
             return True
         return False
 
-
     @staticmethod
     def stop_sequence_stopper(
         model: InferenceModel,

From fef42a6273064a1103715458505fe41f5e747b32 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 11:52:39 -0500
Subject: [PATCH 008/107] API: Fix loading

---
 aiserver.py                                    | 18 ++++++++++++++----
 .../inference_models/generic_hf_torch/class.py |  5 ++++-
 modeling/inference_models/hf.py                | 11 ++++++++++-
 modeling/inference_models/hf_torch.py          |  3 +++
 4 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0aa9bd4c..e76bf2c7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1747,9 +1747,6 @@ def load_model(model_backend, initial_load=False):
     koboldai_vars.aibusy = True
     koboldai_vars.horde_share = False
 
-    if initial_load:
-        use_breakmodel_args = True
-
     koboldai_vars.reset_model()
 
     koboldai_vars.noai = False
@@ -8235,6 +8232,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema):
 
 class ModelSelectionSchema(KoboldSchema):
     model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'})
+    backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys()))
 
 def _generate_text(body: GenerationInputSchema):
     if koboldai_vars.aibusy or koboldai_vars.genseqs:
@@ -8492,6 +8490,7 @@ def put_model(body: ModelSelectionSchema):
       summary: Load a model
       description: |-2
         Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model.
+        Optionally, a backend parameter can be passed in to dictate which backend loads the model.
       tags:
         - model
       requestBody:
@@ -8501,6 +8500,7 @@ def put_model(body: ModelSelectionSchema):
             schema: ModelSelectionSchema
             example:
               model: ReadOnly
+              backend: Read Only
       responses:
         200:
           description: Successful request
@@ -8518,8 +8518,18 @@ def put_model(body: ModelSelectionSchema):
     set_aibusy(1)
     old_model = koboldai_vars.model
     koboldai_vars.model = body.model.strip()
+
+    backend = getattr(body, "backend", None)
+    if not backend:
+        # Backend is optional for backwards compatibility; it should probably be
+        # required on the next major API version.
+        if body.model == "ReadOnly":
+            backend = "Read Only"
+        else:
+            backend = "Huggingface"
+
     try:
-        load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True)
+        load_model(backend)
     except Exception as e:
         koboldai_vars.model = old_model
         raise e
diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 93def5a6..541c3891 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -21,7 +21,10 @@ model_backend_name = "Huggingface"
 model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
 
 class model_backend(HFTorchInferenceModel):
-        
+    def __init__(self) -> None:
+        super().__init__()
+        self.use_4_bit = False
+
     def _initialize_model(self):
         return
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index e407f5b4..27425a46 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -15,8 +15,12 @@ class HFInferenceModel(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.model_config = None
-        #self.model_name = model_name
 
+        # TODO: model_name should probably be an instantiation parameter all the
+        # way down the inheritance chain.
+        self.model_name = None
+
+        self.path = None
         self.hf_torch = False
         self.model = None
         self.tokenizer = None
@@ -213,6 +217,11 @@ class HFInferenceModel(InferenceModel):
                 torch.cuda.empty_cache()
         except:
             pass
+    
+    def _pre_load(self) -> None:
+        # HACK: Make model instantiation work without UI parameters
+        self.model_name = self.model_name or utils.koboldai_vars.model
+        return super()._pre_load()
 
     def _post_load(self) -> None:
         self.badwordsids = koboldai_settings.badwordsids_default
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 1b411c95..140acedc 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -89,7 +89,10 @@ class HFTorchInferenceModel(HFInferenceModel):
         self.hf_torch = True
         self.lazy_load = True
         self.low_mem = False
+
+        # TODO: Mayyyybe only keep one of these variables
         self.nobreakmodel = False
+        self.breakmodel = False
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,

From 813e210127c2d990d43a8ae7cd90f7124ea64d61 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 11:52:49 -0500
Subject: [PATCH 009/107] Bump tiny API version

As we're adding a new (though optional) parameter to load endpoint
---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index e76bf2c7..95234644 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -892,7 +892,7 @@ tags = [
 api_version = None  # This gets set automatically so don't change this value
 
 api_v1 = KoboldAPISpec(
-    version="1.2.2",
+    version="1.2.3",
     prefixes=["/api/v1", "/api/latest"],
     tags=tags,
 )

From b9b3cd3aba4d076cf9c703b5cb29a2ecc4b6e431 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 12:02:53 -0500
Subject: [PATCH 010/107] API: Fix /story

---
 aiserver.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 95234644..2278015c 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8817,8 +8817,14 @@ def get_story():
     chunks = []
     if koboldai_vars.gamestarted:
         chunks.append({"num": 0, "text": koboldai_vars.prompt})
-    for num, action in koboldai_vars.actions.items():
-        chunks.append({"num": num + 1, "text": action})
+
+    last_action_num = list(koboldai_vars.actions.actions.keys())[-1]
+    for num, action in koboldai_vars.actions.actions.items():
+        text = action["Selected Text"]
+        # The last action seems to always be empty
+        if not text and num == last_action_num:
+            continue
+        chunks.append({"num": num + 1, "text": text})
     return {"results": chunks}
 
 

From 6da7a9629ad9c5ae2b25415e12174addc6b3b545 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 13:01:07 -0500
Subject: [PATCH 011/107] API: Fix /story/load

---
 aiserver.py          | 10 +++++++---
 koboldai_settings.py | 15 +++++++++------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 2278015c..153e6d07 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -5130,9 +5130,13 @@ def load_story_v1(js, from_file=None):
 def load_story_v2(js, from_file=None):
     logger.debug("Loading V2 Story")
     logger.debug("Called from {}".format(inspect.stack()[1].function))
-    leave_room(session['story'])
-    session['story'] = js['story_name']
-    join_room(session['story'])
+
+    new_story = js["story_name"]
+    # In socket context
+    if hasattr(request, "sid"):
+        leave_room(session['story'])
+        join_room(new_story)
+    session['story'] = new_story
     
     koboldai_vars.load_story(session['story'], js)
     
diff --git a/koboldai_settings.py b/koboldai_settings.py
index ebd8c019..3bc0eb86 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -6,7 +6,7 @@ import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys
 import shutil
 from typing import List, Union
 from io import BytesIO
-from flask import has_request_context, session
+from flask import has_request_context, session, request
 from flask_socketio import join_room, leave_room
 from collections import OrderedDict
 import multiprocessing
@@ -130,11 +130,14 @@ class koboldai_vars(object):
         original_story_name = story_name
         if not multi_story:
             story_name = 'default'
-        #Leave the old room and join the new one
-        logger.debug("Leaving room {}".format(session['story']))
-        leave_room(session['story'])
-        logger.debug("Joining room {}".format(story_name))
-        join_room(story_name)
+
+        # Leave the old room and join the new one if in socket context
+        if hasattr(request, "sid"):
+            logger.debug("Leaving room {}".format(session['story']))
+            leave_room(session['story'])
+            logger.debug("Joining room {}".format(story_name))
+            join_room(story_name)
+
         session['story'] = story_name
         logger.debug("Sending story reset")
         self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name)

From 9726d12ede27bd3b214c412f48cc8dc9c4f66f1c Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 13:05:35 -0500
Subject: [PATCH 012/107] API: Fix /story/end (POST)

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 153e6d07..6e55b943 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3265,9 +3265,9 @@ def check_for_backend_compilation():
 
 def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False):
     # Ignore new submissions if the AI is currently busy
-    if(koboldai_vars.aibusy):
+    if koboldai_vars.aibusy and not ignore_aibusy:
         return
-    
+
     while(True):
         set_aibusy(1)
         koboldai_vars.actions.clear_unused_options()

From 2d80f2ebb5902f4a2400227ab390f68f5b837e94 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 13:08:57 -0500
Subject: [PATCH 013/107] API: Fix getstorynums

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 6e55b943..c90d862c 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8852,7 +8852,7 @@ def get_story_nums():
     chunks = []
     if koboldai_vars.gamestarted:
         chunks.append(0)
-    for num in koboldai_vars.actions.keys():
+    for num in koboldai_vars.actions.actions.keys():
         chunks.append(num + 1)
     return {"results": chunks}
 

From 4335d1f46a9968dc6589daf4c147408835a6c5e9 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Wed, 19 Jul 2023 13:18:45 -0500
Subject: [PATCH 014/107] API: Fix /world_info

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index c90d862c..604dba5f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -4424,7 +4424,7 @@ def requestwi():
 #  and items in different folders are sorted based on the order of the folders
 #==================================================================#
 def stablesortwi():
-    mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)}
+    mapping = {str(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)}
     koboldai_vars.worldinfo.sort(key=lambda x: mapping[str(x["folder"])] if x["folder"] is not None else float("inf"))
     last_folder = ...
     last_wi = None
@@ -9213,7 +9213,7 @@ def get_world_info():
             if wi["folder"] != last_folder:
                 folder = []
                 if wi["folder"] is not None:
-                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder})
+                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder})
                 last_folder = wi["folder"]
             (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")})
     return {"folders": folders, "entries": entries}

From 34a98d2962678b468b833ec3c506c690e4c50e8e Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:48:02 -0500
Subject: [PATCH 015/107] Context Menu: Small visual fixes

woohooooo back to css
- fixes margins to look better
- moves contents of context menu items 1px down
- fixes context menus near edge wrapping their inner text (ew)
---
 static/koboldai.css | 10 ++++++++--
 static/koboldai.js  |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3252c21a..3ad643d2 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2705,13 +2705,14 @@ body {
 #context-menu > hr {
 	/* Division Color*/
 	border-top: 2px solid var(--context_menu_division);
-	margin: 5px 5px;
+	margin: 3px 5px;
 }
 
 .context-menu-item {
 	padding: 5px;
 	padding-right: 25px;
 	min-width: 100px;
+	white-space: nowrap;
 }
 
 .context-menu-item:hover {
@@ -2722,11 +2723,16 @@ body {
 
 .context-menu-item > .material-icons-outlined {
 	position: relative;
-	top: 2px;
+	top: 3px;
 	font-size: 15px;
 	margin-right: 5px;
 }
 
+.context-menu-item > .context-menu-label {
+	position: relative;
+	top: 1px;
+}
+
 /* Substitutions */
 #Substitutions {
 	margin-left: 10px;
diff --git a/static/koboldai.js b/static/koboldai.js
index 8b70dd6a..e8053f23 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -6071,7 +6071,7 @@ process_cookies();
 			context_menu_cache.push({shouldShow: action.shouldShow});
 
 			let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon});
-			item.append(action.label);
+			$e("span", item, {classes: ["context-menu-label"], innerText: action.label});
 
 			item.addEventListener("mousedown", e => e.preventDefault());
 			// Expose the "summonEvent" to enable access to original context menu target.

From 4921040fb462dac00ba8a028a8b22e9524f9f740 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:52:12 -0500
Subject: [PATCH 016/107] Context Menu: Make things a little less bloaty

5px was a bit excessive
TODO: studied the context menu in my browser for a bit and noticed that
if it was going to be too close to the bottom, the browser changes the
vertical direction the context menu goes. sounds neat!
---
 static/koboldai.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3ad643d2..b83384c4 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2709,7 +2709,7 @@ body {
 }
 
 .context-menu-item {
-	padding: 5px;
+	padding: 4px;
 	padding-right: 25px;
 	min-width: 100px;
 	white-space: nowrap;

From 46c377b0c362d715e1c37f423ab763f367c32299 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:53:48 -0500
Subject: [PATCH 017/107] Context Menu: Add stubs for new temporary
 stoppingcriteria idea

I think this would be cool!

Ideas:
    - disable/grey when model doesnt support stopping criteria
    - shortcuts (maybe, this would def be a power user thing)
    - option to generate until EOS token
    - option to generate forever until user manually stops
    - (not super related but pixels away) make retry while generation is
        ongoing cancel generation and retry. same with undo.
---
 static/koboldai.js       | 6 ++++++
 templates/index_new.html | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index e8053f23..64da7146 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -147,6 +147,12 @@ const context_menu_actions = {
 	"wi-img-upload-button": [
 		{label: "Upload Image", icon: "file_upload", enabledOn: "ALWAYS", click: wiImageReplace},
 		{label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage},
+	],
+	"submit-button": [
+		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
 	]
 };
 
diff --git a/templates/index_new.html b/templates/index_new.html
index 99b8c941..53bcffd5 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -110,7 +110,7 @@
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
 			</span>
-			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();">Submit</button>
+			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>
 			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
 			<button type="button" class="btn action_button redo var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRedo();" aria-label="redo"><span class="material-icons-outlined" style="font-size: 1.4em;">arrow_forward</span></button>

From 6cf63f781a3c17ab6b41c5f12cd05f824be9ba04 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 01:58:57 -0500
Subject: [PATCH 018/107] YEAAAAAAAAAA

---
 static/koboldai.js       | 11 +++++++++++
 templates/index_new.html |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 64da7146..75563df2 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -151,8 +151,19 @@ const context_menu_actions = {
 	"submit-button": [
 		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
 		null,
+		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		null,
 		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
 		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+	],
+	"undo-button": [
+		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}},
 	]
 };
 
diff --git a/templates/index_new.html b/templates/index_new.html
index 53bcffd5..2b1c0ddf 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -112,7 +112,7 @@
 			</span>
 			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>
-			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
+			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo" context-menu="undo-button"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
 			<button type="button" class="btn action_button redo var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRedo();" aria-label="redo"><span class="material-icons-outlined" style="font-size: 1.4em;">arrow_forward</span></button>
 			<button type="button" class="btn action_button retry var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRetry();" aria-label="retry"><span class="material-icons-outlined" style="font-size: 1.4em;">autorenew</span></button>
 		</div>

From fa0a09994386f704e99ef7bebeb6903469aea044 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 10:38:17 -0500
Subject: [PATCH 019/107] Update comment

---
 modeling/inference_models/hf_torch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 140acedc..c4e82e6f 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -90,7 +90,8 @@ class HFTorchInferenceModel(HFInferenceModel):
         self.lazy_load = True
         self.low_mem = False
 
-        # TODO: Mayyyybe only keep one of these variables
+        # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel`
+        # indicates whether breakmodel is currently being used
         self.nobreakmodel = False
         self.breakmodel = False
 

From 3a43b254b86733a637a2286bf8a3c9421674771a Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:27:30 -0500
Subject: [PATCH 020/107] Add basic support for some of the quick stoppers

---
 aiserver.py                 | 64 +++++++++++++++++++++++++++----------
 modeling/inference_model.py | 32 +++++++++++++++++++
 modeling/stoppers.py        | 52 ++++++++++++++++++++++++------
 static/koboldai.js          | 23 ++++++++-----
 4 files changed, 137 insertions(+), 34 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0aa9bd4c..1cb9146e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -12,6 +12,8 @@ import random
 import shutil
 import eventlet
 
+from modeling.inference_model import GenerationMode
+
 eventlet.monkey_patch(all=True, thread=False, os=False)
 import os, inspect, contextlib, pickle
 os.system("")
@@ -3266,7 +3268,16 @@ def check_for_backend_compilation():
             break
     koboldai_vars.checking = False
 
-def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False):
+def actionsubmit(
+    data,
+    actionmode=0,
+    force_submit=False,
+    force_prompt_gen=False,
+    disable_recentrng=False,
+    no_generate=False,
+    ignore_aibusy=False,
+    gen_mode=GenerationMode.STANDARD
+):
     # Ignore new submissions if the AI is currently busy
     if(koboldai_vars.aibusy):
         return
@@ -3424,7 +3435,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
 
             if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating):
                 # Off to the tokenizer!
-                calcsubmit("")
+                calcsubmit("", gen_mode=gen_mode)
                 if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
                     data = ""
                     force_submit = True
@@ -3779,7 +3790,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
 #==================================================================#
 # Take submitted text and build the text to be given to generator
 #==================================================================#
-def calcsubmit(txt):
+def calcsubmit(txt, gen_mode=GenerationMode.STANDARD):
     anotetxt     = ""    # Placeholder for Author's Note text
     forceanote   = False # In case we don't have enough actions to hit A.N. depth
     anoteadded   = False # In case our budget runs out before we hit A.N. depth
@@ -3821,7 +3832,7 @@ def calcsubmit(txt):
         logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))
         
         start_time = time.time()
-        generate(subtxt, min, max, found_entries)
+        generate(subtxt, min, max, found_entries, gen_mode=gen_mode)
         logger.debug("Submit: generate time {}s".format(time.time()-start_time))
         attention_bias.attention_bias = None
 
@@ -3889,7 +3900,7 @@ class HordeException(Exception):
 # Send text to generator and deal with output
 #==================================================================#
 
-def generate(txt, minimum, maximum, found_entries=None):    
+def generate(txt, minimum, maximum, found_entries=None, gen_mode=GenerationMode.STANDARD):
     koboldai_vars.generated_tkns = 0
 
     if(found_entries is None):
@@ -3911,7 +3922,7 @@ def generate(txt, minimum, maximum, found_entries=None):
     # Submit input text to generator
     try:
         start_time = time.time()
-        genout, already_generated = tpool.execute(model.core_generate, txt, found_entries)
+        genout, already_generated = tpool.execute(model.core_generate, txt, found_entries, gen_mode=gen_mode)
         logger.debug("Generate: core_generate time {}s".format(time.time()-start_time))
     except Exception as e:
         if(issubclass(type(e), lupa.LuaError)):
@@ -6168,22 +6179,43 @@ def UI_2_delete_option(data):
 @socketio.on('submit')
 @logger.catch
 def UI_2_submit(data):
-    if not koboldai_vars.noai and data['theme'] != "":
+    if not koboldai_vars.noai and data['theme']:
+        # Random prompt generation
         logger.debug("doing random prompt")
         memory = koboldai_vars.memory
         koboldai_vars.memory = "{}\n\nYou generate the following {} story concept :".format(koboldai_vars.memory, data['theme'])
         koboldai_vars.lua_koboldbridge.feedback = None
         actionsubmit("", force_submit=True, force_prompt_gen=True)
         koboldai_vars.memory = memory
-    else:
-        logger.debug("doing normal input")
-        koboldai_vars.actions.clear_unused_options()
-        koboldai_vars.lua_koboldbridge.feedback = None
-        koboldai_vars.recentrng = koboldai_vars.recentrngm = None
-        if koboldai_vars.actions.action_count == -1:
-            actionsubmit(data['data'], actionmode=koboldai_vars.actionmode)
-        else:
-            actionsubmit(data['data'], actionmode=koboldai_vars.actionmode)
+        return
+
+    logger.debug("doing normal input")
+    koboldai_vars.actions.clear_unused_options()
+    koboldai_vars.lua_koboldbridge.feedback = None
+    koboldai_vars.recentrng = koboldai_vars.recentrngm = None
+
+    gen_mode_name = data.get("gen_mode", None)
+    gen_mode = {
+        # If we don't have a gen mode, or it's None (the default), just do a
+        # normal submission.
+        None: GenerationMode.STANDARD,
+
+        # NOTE: forever should be a no-op on models that don't support
+        # interrupting generation. This should be conveyed to the user by
+        # graying out the option in the context menu.
+        "forever": GenerationMode.FOREVER,
+
+        # The following gen modes require stopping criteria to be respected by
+        # the backend:
+        "until_eos": GenerationMode.UNTIL_EOS,
+        "until_newline": GenerationMode.UNTIL_NEWLINE,
+        "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END,
+    }.get(gen_mode_name, None)
+
+    if not gen_mode:
+        raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'")
+
+    actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
  
  #==================================================================#
 # Event triggered when user clicks the submit button
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index a2d4fa63..1d285576 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 from dataclasses import dataclass
 import time
 from typing import List, Optional, Union
+
+from enum import Enum
 from logger import logger
 
 import torch
@@ -12,6 +14,7 @@ from transformers import (
     GPT2Tokenizer,
     AutoTokenizer,
 )
+from modeling.stoppers import Stoppers
 from modeling.tokenizer import GenericTokenizer
 from modeling import logits_processors
 
@@ -154,6 +157,12 @@ class ModelCapabilities:
     # Some models need to warm up the TPU before use
     uses_tpu: bool = False
 
+class GenerationMode(Enum):
+    STANDARD = 0
+    FOREVER = 1
+    UNTIL_EOS = 2
+    UNTIL_NEWLINE = 3
+    UNTIL_SENTENCE_END = 4
 
 class InferenceModel:
     """Root class for all models."""
@@ -256,6 +265,7 @@ class InferenceModel:
         self,
         text: list,
         found_entries: set,
+        gen_mode: GenerationMode = GenerationMode.STANDARD,
     ):
         """Generate story text. Heavily tied to story-specific parameters; if
         you are making a new generation-based feature, consider `generate_raw()`.
@@ -263,6 +273,7 @@ class InferenceModel:
         Args:
             text (list): Encoded input tokens
             found_entries (set): Entries found for Dynamic WI
+            gen_mode (GenerationMode): The GenerationMode to pass to raw_generate. Defaults to GenerationMode.STANDARD
 
         Raises:
             RuntimeError: if inconsistancies are detected with the internal state and Lua state -- sanity check
@@ -358,6 +369,7 @@ class InferenceModel:
                         seed=utils.koboldai_vars.seed
                         if utils.koboldai_vars.full_determinism
                         else None,
+                        gen_mode=gen_mode
                     )
                     logger.debug(
                         "core_generate: run raw_generate pass {} {}s".format(
@@ -532,6 +544,7 @@ class InferenceModel:
         found_entries: set = (),
         tpu_dynamic_inference: bool = False,
         seed: Optional[int] = None,
+        gen_mode: GenerationMode = GenerationMode.STANDARD,
         **kwargs,
     ) -> GenerationResult:
         """A wrapper around `_raw_generate()` that handles gen_state and other stuff. Use this to generate text outside of the story.
@@ -547,6 +560,7 @@ class InferenceModel:
             is_core (bool, optional): Whether this generation is a core story generation. Defaults to False.
             single_line (bool, optional): Generate one line only.. Defaults to False.
             found_entries (set, optional): Entries found for Dynamic WI. Defaults to ().
+            gen_mode (GenerationMode): Special generation mode. Defaults to GenerationMode.STANDARD.
 
         Raises:
             ValueError: If prompt type is weird
@@ -568,6 +582,21 @@ class InferenceModel:
             "wi_scanner_excluded_keys", set()
         )
 
+        temp_stoppers = []
+
+        if gen_mode == GenerationMode.FOREVER:
+            raise NotImplementedError()
+        elif gen_mode == GenerationMode.UNTIL_EOS:
+            # Still need to unban
+            raise NotImplementedError()
+        elif gen_mode == GenerationMode.UNTIL_NEWLINE:
+            # TODO: Look into replacing `single_line` with `generation_mode`
+            temp_stoppers.append(Stoppers.newline_stopper)
+        elif gen_mode == GenerationMode.UNTIL_SENTENCE_END:
+            temp_stoppers.append(Stoppers.sentence_end_stopper)
+
+        self.stopper_hooks += temp_stoppers
+
         utils.koboldai_vars.inference_config.do_core = is_core
         gen_settings = GenerationSettings(*(generation_settings or {}))
 
@@ -604,6 +633,9 @@ class InferenceModel:
                 f"Generated {len(result.encoded[0])} tokens in {time_end} seconds, for an average rate of {tokens_per_second} tokens per second."
             )
 
+        for stopper in temp_stoppers:
+            self.stopper_hooks.remove(stopper)
+
         return result
 
     def generate(
diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 94c09e85..02c1ce48 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -3,15 +3,12 @@ from __future__ import annotations
 import torch
 
 import utils
-from modeling.inference_model import (
-    InferenceModel,
-)
-
+from modeling import inference_model
 
 class Stoppers:
     @staticmethod
     def core_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.inference_config.do_core:
@@ -62,7 +59,7 @@ class Stoppers:
 
     @staticmethod
     def dynamic_wi_scanner(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.inference_config.do_dynamic_wi:
@@ -93,7 +90,7 @@ class Stoppers:
 
     @staticmethod
     def chat_mode_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.chatmode:
@@ -118,7 +115,7 @@ class Stoppers:
 
     @staticmethod
     def stop_sequence_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
                 
@@ -145,14 +142,22 @@ class Stoppers:
 
     @staticmethod
     def singleline_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
-        """If singleline mode is enabled, it's pointless to generate output beyond the first newline."""
+        """Stop on occurances of newlines **if singleline is enabled**."""
 
+        # It might be better just to do this further up the line
         if not utils.koboldai_vars.singleline:
             return False
+        return Stoppers.newline_stopper(model, input_ids)
 
+    @staticmethod
+    def newline_stopper(
+        model: inference_model.InferenceModel,
+        input_ids: torch.LongTensor,
+    ) -> bool:
+        """Stop on occurances of newlines."""
         # Keep track of presence of newlines in each sequence; we cannot stop a
         # batch member individually, so we must wait for all of them to contain
         # a newline.
@@ -167,3 +172,30 @@ class Stoppers:
             del model.gen_state["newline_in_sequence"]
             return True
         return False
+
+    @staticmethod
+    def sentence_end_stopper(
+        model: inference_model.InferenceModel,
+        input_ids: torch.LongTensor,
+    ) -> bool:
+        """Stops at the end of sentences."""
+
+        # TODO: Make this more robust
+        SENTENCE_ENDS = [".", "?", "!"]
+
+        # We need to keep track of stopping for each batch, since we can't stop
+        # one individually.
+        if "sentence_end_in_sequence" not in model.gen_state:
+            model.gen_state["sentence_end_sequence"] = [False] * len(input_ids)
+
+        for sequence_idx, batch_sequence in enumerate(input_ids):
+            decoded = model.tokenizer.decode(batch_sequence[-1])
+            for end in SENTENCE_ENDS:
+                if end in decoded:
+                    model.gen_state["sentence_end_sequence"][sequence_idx] = True
+                    break
+
+        if all(model.gen_state["sentence_end_sequence"]):
+            del model.gen_state["sentence_end_sequence"]
+            return True
+        return False
\ No newline at end of file
diff --git a/static/koboldai.js b/static/koboldai.js
index 75563df2..320ec927 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -149,13 +149,13 @@ const context_menu_actions = {
 		{label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage},
 	],
 	"submit-button": [
-		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()},
 		null,
-		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")},
+		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")},
 		null,
-		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
+		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
 	],
 	"undo-button": [
 		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
@@ -256,10 +256,17 @@ function disconnect() {
 	document.getElementById("disconnect_message").classList.remove("hidden");
 }
 
-function storySubmit() {
+function storySubmit(genMode=null) {
+	const textInput = document.getElementById("input_text");
+	const themeInput = document.getElementById("themetext");
 	disruptStoryState();
-	socket.emit('submit', {'data': document.getElementById('input_text').value, 'theme': document.getElementById('themetext').value});
-	document.getElementById('input_text').value = '';
+	socket.emit('submit', {
+		data: textInput.value,
+		theme: themeInput.value,
+		gen_mode: genMode,
+	});
+
+	textInput.value = '';
 	document.getElementById('themetext').value = '';
 }
 

From 1c4157a41b753b8e3dd4246770c3cfa889485306 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:33:38 -0500
Subject: [PATCH 021/107] Maybe another time

too many ideas at once
---
 static/koboldai.js | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 320ec927..8ccac9dc 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -157,14 +157,6 @@ const context_menu_actions = {
 		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
 		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
 	],
-	"undo-button": [
-		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
-		null,
-		{label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}},
-		null,
-		{label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}},
-	]
 };
 
 let context_menu_cache = [];

From b8671cce09e83c4c64239951edf0150ef8b8d190 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:48:23 -0500
Subject: [PATCH 022/107] Context Menu: Change positioning algorithm for y-axis

---
 static/koboldai.js | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 8ccac9dc..cdb7bc79 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -5818,8 +5818,21 @@ function position_context_menu(contextMenu, x, y) {
 		right: x + width,
 	};
 
+	// Slide over if running against the window bounds.
 	if (farMenuBounds.right > bounds.right) x -= farMenuBounds.right - bounds.right;
-	if (farMenuBounds.bottom > bounds.bottom) y -= farMenuBounds.bottom - bounds.bottom;
+
+	if (farMenuBounds.bottom > bounds.bottom) {
+		// We've hit the bottom.
+
+		// The old algorithm pushed the menu against the wall, similar to what's
+		// done on the x-axis:
+		// y -= farMenuBounds.bottom - bounds.bottom;
+		// But now, we make the box change its emission direction from the cursor:
+		y -= (height + 5);
+		// The main advantage of this approach is that the cursor is never directly
+		// placed above a context menu item immediately after activating the context
+		// menu. (Thus the 5px offset also added)
+	}
 
 	contextMenu.style.left = `${x}px`;
 	contextMenu.style.top = `${y}px`;

From 8d5ae38b4568e1dbc893c8adfb7d3ac4a8bd57c2 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 14:29:41 -0500
Subject: [PATCH 023/107] Context Menu: Show if gen mode is supported

- adds callback support to `enabledOn` in context menu items
- adds `supported_gen_modes` variable for frontend to check if a gen
  mode is supported
- adds `get_supported_gen_modes` to `InferenceModel` to get supported
  gen modes
- takes advantage of cool enum features for less enum-handling code
---
 aiserver.py                 | 29 ++++++-------------
 koboldai_settings.py        |  1 +
 modeling/inference_model.py | 37 +++++++++++++++++++++----
 static/koboldai.js          | 55 +++++++++++++++++++++++++++++--------
 4 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 1cb9146e..ba224b3c 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1790,7 +1790,9 @@ def load_model(model_backend, initial_load=False):
     
     with use_custom_unpickler(RestrictedUnpickler):
         model = model_backends[model_backend]
+        koboldai_vars.supported_gen_modes = [x.value for x in model.get_supported_gen_modes()]
         model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+
     koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
     if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
         koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
@@ -6194,26 +6196,13 @@ def UI_2_submit(data):
     koboldai_vars.lua_koboldbridge.feedback = None
     koboldai_vars.recentrng = koboldai_vars.recentrngm = None
 
-    gen_mode_name = data.get("gen_mode", None)
-    gen_mode = {
-        # If we don't have a gen mode, or it's None (the default), just do a
-        # normal submission.
-        None: GenerationMode.STANDARD,
-
-        # NOTE: forever should be a no-op on models that don't support
-        # interrupting generation. This should be conveyed to the user by
-        # graying out the option in the context menu.
-        "forever": GenerationMode.FOREVER,
-
-        # The following gen modes require stopping criteria to be respected by
-        # the backend:
-        "until_eos": GenerationMode.UNTIL_EOS,
-        "until_newline": GenerationMode.UNTIL_NEWLINE,
-        "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END,
-    }.get(gen_mode_name, None)
-
-    if not gen_mode:
-        raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'")
+    gen_mode_name = data.get("gen_mode", None) or "standard"
+    try:
+        gen_mode = GenerationMode(gen_mode_name)
+    except ValueError:
+        # Invalid enum lookup!
+        gen_mode = GenerationMode.STANDARD
+        logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!")
 
     actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
  
diff --git a/koboldai_settings.py b/koboldai_settings.py
index ebd8c019..f061beb1 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -685,6 +685,7 @@ class model_settings(settings):
         self._koboldai_vars = koboldai_vars
         self.alt_multi_gen = False
         self.bit_8_available = None
+        self.supported_gen_modes = []
         
     def reset_for_model_load(self):
         self.simple_randomness = 0 #Set first as this affects other outputs
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 1d285576..e09249c3 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -147,7 +147,10 @@ class GenerationSettings:
 class ModelCapabilities:
     embedding_manipulation: bool = False
     post_token_hooks: bool = False
+
+    # Used to gauge if manual stopping is possible
     stopper_hooks: bool = False
+
     # TODO: Support non-live probabilities from APIs
     post_token_probs: bool = False
 
@@ -158,11 +161,11 @@ class ModelCapabilities:
     uses_tpu: bool = False
 
 class GenerationMode(Enum):
-    STANDARD = 0
-    FOREVER = 1
-    UNTIL_EOS = 2
-    UNTIL_NEWLINE = 3
-    UNTIL_SENTENCE_END = 4
+    STANDARD = "standard"
+    FOREVER = "forever"
+    UNTIL_EOS = "until_eos"
+    UNTIL_NEWLINE = "until_newline"
+    UNTIL_SENTENCE_END = "until_sentence_end"
 
 class InferenceModel:
     """Root class for all models."""
@@ -585,7 +588,13 @@ class InferenceModel:
         temp_stoppers = []
 
         if gen_mode == GenerationMode.FOREVER:
-            raise NotImplementedError()
+            if self.capabilties.stopper_hooks:
+                self.gen_state["stop_at_genamt"] = False
+                max_new = 1e7
+            else:
+                logger.warning(
+                    "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!"
+                )
         elif gen_mode == GenerationMode.UNTIL_EOS:
             # Still need to unban
             raise NotImplementedError()
@@ -652,3 +661,19 @@ class InferenceModel:
     def _post_token_gen(self, input_ids: torch.LongTensor) -> None:
         for hook in self.post_token_hooks:
             hook(self, input_ids)
+
+    def get_supported_gen_modes(self) -> List[GenerationMode]:
+        """Returns a list of compatible `GenerationMode`s for the current model.
+
+        Returns:
+            List[GenerationMode]: A list of compatible `GenerationMode`s.
+        """
+        ret = []
+        if self.capabilties.stopper_hooks:
+            ret += [
+                GenerationMode.FOREVER,
+                GenerationMode.UNTIL_EOS,
+                GenerationMode.UNTIL_NEWLINE,
+                GenerationMode.UNTIL_SENTENCE_END,
+            ]
+        return ret
\ No newline at end of file
diff --git a/static/koboldai.js b/static/koboldai.js
index cdb7bc79..d7560f54 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -83,6 +83,7 @@ let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
 var selected_model_data;
+var supported_gen_modes = [];
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -151,11 +152,31 @@ const context_menu_actions = {
 	"submit-button": [
 		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()},
 		null,
-		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")},
-		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")},
+		{
+			label: "Generate Forever",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("forever"),
+			click: () => storySubmit("forever")
+		},
+		{
+			label: "Generate Until EOS",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_eos"),
+			click: () => storySubmit("until_eos")
+		},
 		null,
-		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
-		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
+		{
+			label: "Finish Line",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_newline"),
+			click: () => storySubmit("until_newline")
+		},
+		{
+			label: "Finish Sentence",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_sentence_end"),
+			click: () => storySubmit("until_sentence_end")
+		},
 	],
 };
 
@@ -941,6 +962,9 @@ function var_changed(data) {
 	//special case for welcome text since we want to allow HTML
 	} else if (data.classname == 'model' && data.name == 'welcome') {
 		document.getElementById('welcome_text').innerHTML = data.value;
+	//Special case for permitted generation modes
+	} else if (data.classname == 'model' && data.name == 'supported_gen_modes') {
+		supported_gen_modes = data.value;
 	//Basic Data Syncing
 	} else {
 		var elements_to_change = document.getElementsByClassName("var_sync_"+data.classname.replace(" ", "_")+"_"+data.name.replace(" ", "_"));
@@ -6090,21 +6114,23 @@ process_cookies();
 				continue;
 			}
 
+			const enableCriteriaIsFunction = typeof action.enabledOn === "function"
 
-			let item = $e("div", contextMenu, {
+			const itemEl = $e("div", contextMenu, {
 				classes: ["context-menu-item", "noselect", `context-menu-${key}`],
-				"enabled-on": action.enabledOn,
+				"enabled-on": enableCriteriaIsFunction ? "CALLBACK" : action.enabledOn,
 				"cache-index": context_menu_cache.length
 			});
+			itemEl.enabledOnCallback = action.enabledOn;
 
 			context_menu_cache.push({shouldShow: action.shouldShow});
 
-			let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon});
-			$e("span", item, {classes: ["context-menu-label"], innerText: action.label});
+			const icon = $e("span", itemEl, {classes: ["material-icons-outlined"], innerText: action.icon});
+			$e("span", itemEl, {classes: ["context-menu-label"], innerText: action.label});
 
-			item.addEventListener("mousedown", e => e.preventDefault());
+			itemEl.addEventListener("mousedown", e => e.preventDefault());
 			// Expose the "summonEvent" to enable access to original context menu target.
-			item.addEventListener("click", () => action.click(summonEvent));
+			itemEl.addEventListener("click", () => action.click(summonEvent));
 		}
 	}
 
@@ -6154,10 +6180,10 @@ process_cookies();
 
 		// Disable non-applicable items
 		$(".context-menu-item").addClass("disabled");
-		
+
 		// A selection is made
 		if (getSelectionText()) $(".context-menu-item[enabled-on=SELECTION]").removeClass("disabled");
-		
+
 		// The caret is placed
 		if (get_caret_position(target) !== null) $(".context-menu-item[enabled-on=CARET]").removeClass("disabled");
 
@@ -6166,6 +6192,11 @@ process_cookies();
 
 		$(".context-menu-item[enabled-on=ALWAYS]").removeClass("disabled");
 
+		for (const contextMenuItem of document.querySelectorAll(".context-menu-item[enabled-on=CALLBACK]")) {
+			if (!contextMenuItem.enabledOnCallback()) continue;
+			contextMenuItem.classList.remove("disabled");
+		}
+
 		// Make sure hr isn't first or last visible element
 		let visibles = [];
 		for (const item of contextMenu.children) {

From c78401bd124be939134c1f50ac831c434697b681 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:22:14 -0500
Subject: [PATCH 024/107] Fix gen mode on first generation

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ba224b3c..0bfaca22 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3371,7 +3371,7 @@ def actionsubmit(
                 koboldai_vars.prompt = data
                 # Clear the startup text from game screen
                 emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1")
-                calcsubmit("") # Run the first action through the generator
+                calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator
                 if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
                     data = ""
                     force_submit = True
@@ -6205,7 +6205,7 @@ def UI_2_submit(data):
         logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!")
 
     actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
- 
+
  #==================================================================#
 # Event triggered when user clicks the submit button
 #==================================================================#

From e5d0a597a1806815ca7463a6536d6719ceb8d165 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:36:32 -0500
Subject: [PATCH 025/107] Generation Mode: UNTIL_EOS

This mode enables the EOS token and will generate infinitely until
hitting it.
---
 modeling/inference_model.py           | 24 +++++++++++++-----------
 modeling/inference_models/hf_torch.py | 13 ++++++++++++-
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index e09249c3..8b7f0e3e 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -585,19 +585,21 @@ class InferenceModel:
             "wi_scanner_excluded_keys", set()
         )
 
+        self.gen_state["allow_eos"] = False
+
         temp_stoppers = []
 
+        if gen_mode not in self.get_supported_gen_modes():
+            gen_mode = GenerationMode.STANDARD
+            logger.warning(f"User requested unsupported GenerationMode '{gen_mode}'!")
+
         if gen_mode == GenerationMode.FOREVER:
-            if self.capabilties.stopper_hooks:
-                self.gen_state["stop_at_genamt"] = False
-                max_new = 1e7
-            else:
-                logger.warning(
-                    "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!"
-                )
+            self.gen_state["stop_at_genamt"] = False
+            max_new = 1e7
         elif gen_mode == GenerationMode.UNTIL_EOS:
-            # Still need to unban
-            raise NotImplementedError()
+            self.gen_state["allow_eos"] = True
+            self.gen_state["stop_at_genamt"] = False
+            max_new = 1e7
         elif gen_mode == GenerationMode.UNTIL_NEWLINE:
             # TODO: Look into replacing `single_line` with `generation_mode`
             temp_stoppers.append(Stoppers.newline_stopper)
@@ -668,11 +670,11 @@ class InferenceModel:
         Returns:
             List[GenerationMode]: A list of compatible `GenerationMode`s.
         """
-        ret = []
+        ret = [GenerationMode.STANDARD]
+
         if self.capabilties.stopper_hooks:
             ret += [
                 GenerationMode.FOREVER,
-                GenerationMode.UNTIL_EOS,
                 GenerationMode.UNTIL_NEWLINE,
                 GenerationMode.UNTIL_SENTENCE_END,
             ]
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 1b411c95..b4909f60 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -31,6 +31,7 @@ from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
 from modeling.inference_models.hf import HFInferenceModel
 from modeling.inference_model import (
+    GenerationMode,
     GenerationResult,
     GenerationSettings,
     ModelCapabilities,
@@ -254,7 +255,11 @@ class HFTorchInferenceModel(HFInferenceModel):
             kwargs["logits_warper"] = new_get_logits_warper(
                 beams=1,
             )
-            if utils.koboldai_vars.newlinemode in ["s", "ns"]:
+
+            if (
+                utils.koboldai_vars.newlinemode in ["s", "ns"]
+                and not m_self.gen_state["allow_eos"]
+            ):
                 kwargs["eos_token_id"] = -1
                 kwargs.setdefault("pad_token_id", 2)
             return new_sample.old_sample(self, *args, **kwargs)
@@ -605,3 +610,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             self.breakmodel = False
             self.usegpu = False
             return
+
+    def get_supported_gen_modes(self) -> List[GenerationMode]:
+        # This changes a torch patch to disallow eos as a bad word.
+        return super().get_supported_gen_modes() + [
+            GenerationMode.UNTIL_EOS
+        ]
\ No newline at end of file

From 6e7b0794ea80c9eae1a6bc4f89590e3d657febea Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:40:07 -0500
Subject: [PATCH 026/107] Context Menu: Fix for elements with a context-menu
 attribute but...

...without an entry in `context_menu_items`.
---
 static/koboldai.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/static/koboldai.js b/static/koboldai.js
index d7560f54..b25bef31 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -6153,6 +6153,10 @@ process_cookies();
 
 		// Show only applicable actions in the context menu
 		let contextMenuType = target.getAttribute("context-menu");
+
+		// If context menu is not present, return
+		if (!context_menu_actions[contextMenuType]) return;
+
 		for (const contextMenuItem of contextMenu.childNodes) {
 			let shouldShow = contextMenuItem.classList.contains(`context-menu-${contextMenuType}`);
 

From 560fb3bd2d2c054695765c5ae1826cd93f83519c Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 18:08:21 -0500
Subject: [PATCH 027/107] Fix occasional action highlight issue

---
 static/koboldai.js | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 8b70dd6a..0fde7169 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -597,13 +597,11 @@ function do_story_text_updates(action) {
 				story_area.append(item);
 			}
 		}
-		
-		
-		if (action.action['Selected Text'].charAt(0) == ">") {
-			item.classList.add("action_mode_input");
-		} else {
-			item.classList.remove("action_mode_input");
-		}
+
+		item.classList.toggle(
+			"action_mode_input",
+			action.action['Selected Text'].replaceAll("\n", "")[0] === ">"
+		);
 
 		if ('wi_highlighted_text' in action.action) {
 			for (chunk of action.action['wi_highlighted_text']) {

From 418f3415608de9c375e12d6713427967d13f9d52 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 18:13:57 -0500
Subject: [PATCH 028/107] Fix a/n depth being visually apart from a/n

---
 templates/story flyout.html | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/templates/story flyout.html b/templates/story flyout.html
index 514edbb9..d08bf8b8 100644
--- a/templates/story flyout.html	
+++ b/templates/story flyout.html	
@@ -50,6 +50,14 @@
 			<label for="authors_notes">Author's Notes:</label><br/>
 			<textarea autocomplete="off" rows=16 id="authors_notes" class="var_sync_story_authornote var_sync_alt_story_authornote_length fullwidth" oninput="autoResize(this)" onchange='sync_to_server(this);'></textarea><br/>
 
+			<div class="setting_tile_area">
+				{% with menu='author_notes' %}
+					{% with sub_path='' %}
+						{% include 'settings item.html' %}
+					{% endwith %}
+				{% endwith %}
+			</div>
+
 			<h4 class="section_header">Genre</h4>
 			<div class="help_text">Styles the AI will attempt to imitate. Effectiveness depends on model.</div>
 			<input id="genre-input" class="fullwidth" placeholder="Fantasy" autocomplete="off" spellcheck="false">
@@ -75,14 +83,6 @@
 					}
 				</script>
 			</div>
-
-			<div class="setting_tile_area">
-				{% with menu='author_notes' %}
-					{% with sub_path='' %}
-						{% include 'settings item.html' %}
-					{% endwith %}
-				{% endwith %}
-			</div>
 		</div>
 	</div>
 	<div id="story_menu_notes" class="story_category_area tab-target tab-target-story hidden">

From 5f4216730e8a3cd189e29e213cc2d1e9db60c57f Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 18:33:35 -0500
Subject: [PATCH 029/107] Make logit bias work correctly(?) when prob is -inf

samplers'll do that to you

though now i am curious: what kind of effect would running the bias
before the samplers have? maybe a future option
---
 modeling/inference_models/hf_torch.py | 13 ++-----------
 modeling/logits_processors.py         | 10 +++++++++-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 1b411c95..50aa0538 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -225,9 +225,6 @@ class HFTorchInferenceModel(HFInferenceModel):
         )
 
         class KoboldLogitsWarperList(LogitsProcessorList):
-            def __init__(self):
-                pass
-
             def __call__(
                 lw_self,
                 input_ids: torch.LongTensor,
@@ -244,16 +241,10 @@ class HFTorchInferenceModel(HFInferenceModel):
                     ), f"Scores are None; processor '{processor}' is to blame"
                 return scores
 
-        def new_get_logits_warper(
-            beams: int = 1,
-        ) -> LogitsProcessorList:
-            return KoboldLogitsWarperList()
-
         def new_sample(self, *args, **kwargs):
             assert kwargs.pop("logits_warper", None) is not None
-            kwargs["logits_warper"] = new_get_logits_warper(
-                beams=1,
-            )
+            kwargs["logits_warper"] = KoboldLogitsWarperList()
+
             if utils.koboldai_vars.newlinemode in ["s", "ns"]:
                 kwargs["eos_token_id"] = -1
                 kwargs.setdefault("pad_token_id", 2)
diff --git a/modeling/logits_processors.py b/modeling/logits_processors.py
index 20a18026..d3133109 100644
--- a/modeling/logits_processors.py
+++ b/modeling/logits_processors.py
@@ -267,6 +267,14 @@ class PhraseBiasLogitsProcessor:
 
         for batch in range(scores_shape[0]):
             for token, bias in self._get_biased_tokens(input_ids[batch]).items():
-                scores[batch][token] += bias
+                if bias > 0 and bool(scores[batch][token].isneginf()):
+                    # Adding bias to -inf will do NOTHING!!! So just set it for
+                    # now. There may be more mathishly correct way to do this
+                    # but it'll work. Also, make sure the bias is actually
+                    # positive. Don't give a -inf token more chance by setting
+                    # it to -0.5!
+                    scores[batch][token] = bias
+                else:
+                    scores[batch][token] += bias
 
         return scores

From 91883233312b63e82383f19fdf9a95bcd0648767 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 18:56:29 -0500
Subject: [PATCH 030/107] Biases: Don't crash on empty token seq

---
 modeling/logits_processors.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modeling/logits_processors.py b/modeling/logits_processors.py
index d3133109..a221b4d9 100644
--- a/modeling/logits_processors.py
+++ b/modeling/logits_processors.py
@@ -233,6 +233,8 @@ class PhraseBiasLogitsProcessor:
             token_seqs = self._get_token_sequence(phrase)
             variant_deltas = {}
             for token_seq in token_seqs:
+                if not token_seq:
+                    continue
                 bias_index = self._find_intersection(input_ids, token_seq)
 
                 # Ensure completion after completion_threshold tokens

From 79b1ef1aac93531b6e2f12d132f596129ee9add7 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 19:01:40 -0500
Subject: [PATCH 031/107] Fix "hide welcome logo" tweak

---
 themes/tweaks/hide-welcome-logo.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/themes/tweaks/hide-welcome-logo.css b/themes/tweaks/hide-welcome-logo.css
index 5e35d101..e0964397 100644
--- a/themes/tweaks/hide-welcome-logo.css
+++ b/themes/tweaks/hide-welcome-logo.css
@@ -1 +1 @@
-#welcome_text { display:none; pointer-events: none }
+#welcome-logo { display:none; pointer-events: none }

From a93c9d20b1341017505f33684c550d45e0467957 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 19:01:51 -0500
Subject: [PATCH 032/107] Don't let logo container gobble up clicks

---
 static/koboldai.css | 1 +
 1 file changed, 1 insertion(+)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3252c21a..c35b0c80 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2848,6 +2848,7 @@ body {
 	display: flex;
 	justify-content: center;
 	align-items: center;
+	pointer-events: none;
 }
 
 #welcome-text-content {

From bc8ba91429b8acbb3813fe68e09bb80ae085751d Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 21:44:10 -0500
Subject: [PATCH 033/107] Private Mode improvements

- cleaned up ui a bit
- disallow setting empty password (user could press the hotkey by
  mistake and be confused), giving the user a notification if they try
  to.
- gives the login attempter a message if the password is incorrect, as
  well as screaming in the logs
- obscures story name in page title
---
 aiserver.py           | 11 ++++++--
 static/koboldai.css   | 25 +++++++++++++++++
 static/koboldai.js    | 64 ++++++++++++++++++++++++++++++++++---------
 templates/popups.html |  4 +--
 4 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index dc565c97..8f04cd07 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7784,9 +7784,16 @@ def UI_2_update_tokens(data):
 def UI_2_privacy_mode(data):
     if data['enabled']:
         koboldai_vars.privacy_mode = True
+        return
+
+    if data['password'] == koboldai_vars.privacy_password:
+        koboldai_vars.privacy_mode = False
     else:
-        if data['password'] == koboldai_vars.privacy_password:
-            koboldai_vars.privacy_mode = False
+        logger.warning("Watch out! Someone tried to unlock your instance with an incorrect password! Stay on your toes...")
+        show_error_notification(
+            title="Invalid password",
+            text="The password you provided was incorrect. Please try again."
+        )
 
 #==================================================================#
 # Genres
diff --git a/static/koboldai.css b/static/koboldai.css
index c35b0c80..145f217e 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1457,6 +1457,30 @@ td.server_vars {
 	line-height: 2;
 }
 
+/* Privacy Mode (Lock Screen) */
+#privacy_mode {
+	height: unset;
+	width: unset;
+	position: relative;
+	top: unset;
+	left: unset;
+}
+
+#privacy_mode .popup_list_area {
+	display: flex;
+	align-items: center;
+	flex-direction: column;
+	padding-top: 10px;
+	padding-bottom: 10px;
+	padding-left: 15px;
+	padding-right: 15px;
+}
+
+#privacy_mode input {
+	margin-top: 15px;
+	width: 85%;
+}
+
 /* ---------------------------- OVERALL PAGE CONFIG ------------------------------*/
 body {
 	background-color: var(--background);
@@ -1962,6 +1986,7 @@ body {
 	color: var(--popup_title_bar_color_text);
 	text-align: center;
 	font-size: calc(1.3em + var(--font_size_adjustment));
+	user-select: none;
 }
 
 .popup .action_button {
diff --git a/static/koboldai.js b/static/koboldai.js
index 0fde7169..7e647813 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -83,6 +83,7 @@ let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
 var selected_model_data;
+var privacy_mode_enabled = false;
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -161,7 +162,7 @@ const shortcuts = [
 	{mod: "ctrl", key: "m", desc: "Focuses Memory", func: () => focusEl("#memory")},
 	{mod: "ctrl", key: "u", desc: "Focuses Author's Note", func: () => focusEl("#authors_notes")}, // CTRL-N is reserved :^(
 	{mod: "ctrl", key: "g", desc: "Focuses game text", func: () => focusEl("#input_text")},
-	{mod: "ctrl", key: "l", desc: '"Lock" screen (Not secure)', func: () => socket.emit("privacy_mode", {'enabled': true})},
+	{mod: "ctrl", key: "l", desc: '"Lock" screen (Not secure)', func: maybe_enable_privacy_mode},
 	{mod: "ctrl", key: "k", desc: "Finder", func: open_finder},
 	{mod: "ctrl", key: "/", desc: "Help screen", func: () => openPopup("shortcuts-popup")},
 ]
@@ -3402,16 +3403,36 @@ function update_story_picture(chunk_id) {
 	image.setAttribute("chunk", chunk_id);
 }
 
+function maybe_enable_privacy_mode() {
+	const password = document.getElementById("user_privacy_password").value;
+
+	if (!password) {
+		showNotification(
+			"Lock Failed",
+			"Please set a password before locking KoboldAI.",
+			"error"
+		)
+		return;
+	}
+
+	socket.emit("privacy_mode", {'enabled': true})
+}
+
 function privacy_mode(enabled) {
+	privacy_mode_enabled = enabled;
+	updateTitle();
+
+	const sideMenu = document.getElementById("SideMenu");
+	const mainGrid = document.getElementById("main-grid");
+	const rightSideMenu = document.getElementById("rightSideMenu");
+
+	for (const menu of [sideMenu, mainGrid, rightSideMenu]) {
+		menu.classList.toggle("superblur", enabled);
+	}
+
 	if (enabled) {
-		document.getElementById('SideMenu').classList.add("superblur");
-		document.getElementById('main-grid').classList.add("superblur");
-		document.getElementById('rightSideMenu').classList.add("superblur");
 		openPopup("privacy_mode");
 	} else {
-		document.getElementById('SideMenu').classList.remove("superblur");
-		document.getElementById('main-grid').classList.remove("superblur");
-		document.getElementById('rightSideMenu').classList.remove("superblur");
 		if (!$el("#privacy_mode").classList.contains("hidden")) closePopups();
 		document.getElementById('privacy_password').value = "";
 	}
@@ -4708,7 +4729,7 @@ function close_menus() {
 	document.getElementById("main-grid").classList.remove("story_menu-open");
 	
 	//close popup menus
-	closePopups();
+	closePopups(true);
 	
 	//unselect sampler items
 	for (temp of document.getElementsByClassName("sample_order")) {
@@ -5809,8 +5830,15 @@ function position_context_menu(contextMenu, x, y) {
 
 function updateTitle() {
 	const titleInput = $el(".var_sync_story_story_name");
-	if (!titleInput.innerText) return;
-	document.title = `${titleInput.innerText} - KoboldAI Client`;
+	let titleText = "Story";
+
+	if (!privacy_mode_enabled && titleInput.innerText) {
+		titleText = titleInput.innerText;
+	} else {
+		titleText = "[🔒]"
+	}
+
+	document.title = `${titleText} - KoboldAI Client`;
 }
 
 function openClubImport() {
@@ -5845,17 +5873,27 @@ function openPopup(id) {
 	}
 }
 
-function closePopups() {
+function closePopups(userAction=false) {
+	// userAction specifies if a user tried to close the popup by normal means
+	// (ESC, clicking outside the menu, etc).
 	const container = $el("#popup-container");
-	container.classList.add("hidden");
+	let allHidden = true;
 
 	for (const popupWindow of container.children) {
+		// Do not let the user close windows they shouldn't be! Sneaky devils!
+		if (userAction && popupWindow.getAttribute("allow-close") === "false") {
+			allHidden = false;
+			continue;
+		}
+
 		popupWindow.classList.add("hidden");
 	}
+
+	if (allHidden) container.classList.add("hidden");
 }
 
 $el("#popup-container").addEventListener("click", function(event) {
-	if (event.target === this) closePopups();
+	if (event.target === this) closePopups(true);
 });
 
 /* -- Colab Cookie Handling -- */
diff --git a/templates/popups.html b/templates/popups.html
index 9c6b4a9e..32966267 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -70,12 +70,12 @@
 		</div>
 	</div>
 	<!---------------- Private Mode Unlock screen ---------------------->
-	<div id="privacy_mode" class="popup-window popup">
+	<div id="privacy_mode" class="popup-window popup" allow-close="false">
 		<div class="title">
 			<div class="popuptitletext">Locked</div>
 		</div>
 		<div id="popup_list_area" class="popup_list_area">
-			This story is in private mode. Please enter password to unlock<br/>
+			This story is in private mode. Please enter the password to unlock it.<br/>
 			<input type="password" id="privacy_password"/>
 		</div>
 		<div class="popup_load_cancel">

From cf27d44f622a1a601ee152bad0c0702af47fc433 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 16:50:29 -0500
Subject: [PATCH 034/107] WI: Tag polish

- make the button thingeys look more button-ey
- enter now saves the focused tag and focuses the placeholder tag
- reduced code duplication for primary vs secondary and normal vs
  placeholder tags
---
 static/koboldai.css |  26 +++-
 static/koboldai.js  | 284 ++++++++++++++++++++------------------------
 2 files changed, 157 insertions(+), 153 deletions(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3252c21a..0f83a159 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1268,8 +1268,32 @@ td.server_vars {
 	border-color: var(--wi_tag_color);
 }
 
-.tag .delete_icon {
+.tag .tag_button {
 	cursor: pointer;
+	opacity: 0.4;
+	font-size: 16px;
+	position: relative;
+}
+
+.tag .delete_icon {
+	top: 3px;
+	right: 3px;
+}
+
+.tag .add_icon {
+	top: 3px;
+	right: 3px;
+}
+
+.tag .tag_text {
+	display: inline-block;
+	outline: none;
+	position: relative;
+	right: 3px;
+}
+
+.placeholder_tag .tag_text:empty {
+	opacity: 0.4;
 }
 
 .oi[folder] {
diff --git a/static/koboldai.js b/static/koboldai.js
index dc616b19..00bb43ee 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -83,6 +83,7 @@ let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
 var selected_model_data;
+var attention_wanting_wi_bar = null;
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -2230,15 +2231,16 @@ function world_info_entry(data) {
 		this.classList.add("pulse");
 	})
 
-	tags = world_info_card.querySelector('.world_info_tag_primary_area');
+	const tags = world_info_card.querySelector('.world_info_tag_primary_area');
 	tags.id = "world_info_tags_"+data.uid;
 	//add tag content here
-	add_tags(tags, data);
+	add_tags(tags, data, "primary");
 	
-	secondarytags = world_info_card.querySelector('.world_info_tag_secondary_area');
+	const secondarytags = world_info_card.querySelector('.world_info_tag_secondary_area');
 	secondarytags.id = "world_info_secondtags_"+data.uid;
 	//add second tag content here
-	add_secondary_tags(secondarytags, data);
+	add_tags(secondarytags, data, "secondary");
+
 	//w++ toggle
 	wpp_toggle_area = world_info_card.querySelector('.world_info_wpp_toggle_area');
 	wpp_toggle_area.id = "world_info_wpp_toggle_area_"+data.uid;
@@ -4229,161 +4231,139 @@ function removeA(arr) {
     return arr;
 }
 
-function add_tags(tags, data) {
-	while (tags.firstChild) { 
-		tags.removeChild(tags.firstChild);
+function create_tag_element(tagText, uid, tagType) {
+	// tagText is string, or null for empty tag at end.
+	// barType should be "primary" or "secondary"
+	const isPlaceholderTag = tagText === null;
+
+	const wiCardEl = document.querySelector(`.world_info_card[uid="${uid}"]`)
+	const keyField = {primary: "key", secondary: "keysecondary"}[tagType];
+	const tagClassFragment = {primary: "tags", primary: "secondtags"}[tagType];
+
+	const tagEl = document.createElement("span");
+	tagEl.classList.add("tag");
+	if (isPlaceholderTag) tagEl.classList.add("placeholder_tag");
+
+	const xEl = document.createElement("span");
+	xEl.classList.add("material-icons-outlined");
+	xEl.classList.add("tag_button");
+
+	if (!isPlaceholderTag) {
+		xEl.classList.add("delete_icon");
+		xEl.textContent = "close";
+	} else {
+		xEl.classList.add("add_icon");
+		xEl.textContent = "add";
 	}
-	for (tag of data.key) {
-		tag_item = document.createElement("span");
-		tag_item.classList.add("tag");
-		x = document.createElement("span");
-		x.textContent = "x ";
-		x.classList.add("delete_icon");
-		x.setAttribute("uid", data.uid);
-		x.setAttribute("tag", tag);
-		x.onclick = function () {
-						removeA(world_info_data[this.getAttribute('uid')]['key'], this.getAttribute('tag'));
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					};
-		text = document.createElement("span");
-		text.textContent = tag;
-		text.setAttribute("contenteditable", true);
-		text.setAttribute("uid", data.uid);
-		text.setAttribute("tag", tag);
-		text.id = "world_info_tags_text_"+data.uid+"_"+tag;
-		text.ondragstart=function() {event.preventDefault();event.stopPropagation();};
-		text.setAttribute("draggable", "true");
-		text.onfocus=function() {this.parentElement.parentElement.parentElement.setAttribute('draggable', 'false');this.setAttribute('draggable', 'false');};
-		text.onblur = function () {
-						this.parentElement.parentElement.parentElement.setAttribute('draggable', 'true');
-						this.setAttribute('draggable', 'true');
-						for (var i = 0; i < world_info_data[this.getAttribute('uid')]['key'].length; i++) {
-							if (world_info_data[this.getAttribute('uid')]['key'][i] == this.getAttribute("tag")) {
-								world_info_data[this.getAttribute('uid')]['key'][i] = this.textContent;
-							}
-						}
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					};
-		tag_item.append(x);
-		tag_item.append(text);
-		tag_item.id = "world_info_tags_"+data.uid+"_"+tag;
-		tags.append(tag_item);
-	}
-	//add the blank tag
-	tag_item = document.createElement("span");
-	tag_item.classList.add("tag");
-	x = document.createElement("span");
-	x.textContent = "+ ";
-	tag_item.append(x);
-	text = document.createElement("span");
-	text.classList.add("rawtext");
-	text.textContent = "    ";
-	text.setAttribute("uid", data.uid);
-	text.setAttribute("contenteditable", true);
-	text.id = "world_info_tags_text_"+data.uid+"_blank";
-	text.ondragstart=function() {event.preventDefault();event.stopPropagation();};
-	text.setAttribute("draggable", "true");
-	text.onfocus=function() {this.parentElement.parentElement.parentElement.setAttribute('draggable', 'false');this.setAttribute('draggable', 'false');};
-	text.onblur = function () {
-					this.parentElement.parentElement.parentElement.setAttribute('draggable', 'true');
-					this.setAttribute('draggable', 'true');
-					if (this.textContent.trim() != "") {
-						//console.log(this.textContent);
-						on_new_wi_item = this.id;
-						world_info_data[this.getAttribute('uid')]['key'].push(this.textContent);
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					} else {
-						this.textContent = "    ";
-					}
-				};
-	text.onclick = function () {
-					this.textContent = "";
-				};
-	tag_item.append(text);
-	tag_item.id = "world_info_secondtags_"+data.uid+"_new";
-	tags.append(tag_item);
+
+	xEl.setAttribute("uid", uid);
+	xEl.setAttribute("tag", tagText);
+	xEl.addEventListener("click", function() {
+		removeA(
+			world_info_data[uid][keyField],
+			tagText
+		);
+		send_world_info(uid);
+		this.classList.add("pulse");
+	});
+
+	const textEl = document.createElement("span");
+	textEl.classList.add("tag_text");
+	textEl.textContent = tagText;
+
+	textEl.setAttribute("data-placeholder", "Tag")
+	textEl.setAttribute("contenteditable", true);
+	textEl.setAttribute("uid", uid);
+	textEl.setAttribute("tag", tagText);
+	textEl.setAttribute("draggable", "true");
+	textEl.id = `world_info_${tagClassFragment}_text_${uid}_${tagText || "blank"}`;
+
+	textEl.addEventListener("dragstart", function(event) {
+		event.preventDefault();
+		event.stopPropagation();
+	});
+
+	textEl.addEventListener("focus", function(event) {
+		wiCardEl.setAttribute('draggable', 'false');
+		this.setAttribute('draggable', 'false');
+	});
+
+	textEl.addEventListener("blur", function () {
+		wiCardEl.setAttribute('draggable', 'true');
+		this.setAttribute('draggable', 'true');
+
+		if (!isPlaceholderTag) {
+			// Normal tag
+			for (var i = 0; i < world_info_data[uid][keyField].length; i++) {
+				if (world_info_data[uid][keyField][i] !== tagText) {
+					world_info_data[uid][keyField][i] = this.innerText;
+				}
+			}
+		} else {
+			// Placeholder tag
+			if (!this.textContent.trim()) return;
+
+			on_new_wi_item = this.id;
+			world_info_data[uid][keyField].push(this.textContent);
+		}
+
+		send_world_info(uid);
+		this.classList.add("pulse");
+	});
+
+	textEl.addEventListener("keydown", function(event) {
+		if (event.key === "Enter") {
+			// Press Enter to save tag and focus next one
+			event.preventDefault();
+
+			// HACK: Work around the fact that the server is in control of
+			// placing these elements
+			attention_wanting_wi_bar = tagType;
+			// And don't wait for like 10000 years to randomly take focus from
+			// the user
+			setTimeout(() => attention_wanting_wi_bar = null, 500);
+
+			this.blur();
+		} else if (event.key === "Escape") {
+
+		}
+	})
+
+	tagEl.append(xEl);
+	tagEl.append(textEl);
+	tagEl.id = `world_info_${tagClassFragment}_${uid}_${tagText || "new"}`;
+
+	return tagEl;
 }
 
-function add_secondary_tags(tags, data) {
-	while (tags.firstChild) { 
-		tags.removeChild(tags.firstChild);
+function add_tags(tagBarEl, data, tagType) {
+	// tagType is either "primary" or "secondary"
+
+	// Remove existing tags
+	while (tagBarEl.firstChild) {
+		tagBarEl.removeChild(tagBarEl.firstChild);
 	}
-	for (tag of data.keysecondary) {
-		tag_item = document.createElement("span");
-		tag_item.classList.add("tag");
-		x = document.createElement("span");
-		x.textContent = "x ";
-		x.classList.add("delete_icon");
-		x.setAttribute("uid", data.uid);
-		x.setAttribute("tag", tag);
-		x.onclick = function () {
-						removeA(world_info_data[this.getAttribute('uid')]['keysecondary'], this.getAttribute('tag'));
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					};
-		text = document.createElement("span");
-		text.textContent = tag;
-		text.setAttribute("contenteditable", true);
-		text.setAttribute("uid", data.uid);
-		text.setAttribute("tag", tag);
-		text.id = "world_info_secondtags_text_"+data.uid+"_"+tag;
-		text.ondragstart=function() {event.preventDefault();event.stopPropagation();};
-		text.setAttribute("draggable", "true");
-		text.onfocus=function() {this.parentElement.parentElement.parentElement.setAttribute('draggable', 'false');this.setAttribute('draggable', 'false');};
-		text.onblur = function () {
-						this.parentElement.parentElement.parentElement.setAttribute('draggable', 'true');
-						this.setAttribute('draggable', 'true');
-						for (var i = 0; i < world_info_data[this.getAttribute('uid')]['keysecondary'].length; i++) {
-							if (world_info_data[this.getAttribute('uid')]['keysecondary'][i] == this.getAttribute("tag")) {
-								world_info_data[this.getAttribute('uid')]['keysecondary'][i] = this.textContent;
-							}
-						}
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					};
-		tag_item.append(x);
-		tag_item.append(text);
-		tag_item.id = "world_info_secondtags_"+data.uid+"_"+tag;
-		tags.append(tag_item);
+
+	const tagList = {
+		primary: data.key,
+		secondary: data.keysecondary
+	}[tagType];
+
+	for (tag of tagList) {
+		tagBarEl.append(create_tag_element(tag, data.uid, tagType));
 	}
+
 	//add the blank tag
-	tag_item = document.createElement("span");
-	tag_item.classList.add("tag");
-	x = document.createElement("span");
-	x.textContent = "+ ";
-	tag_item.append(x);
-	text = document.createElement("span");
-	text.classList.add("rawtext");
-	text.textContent = "    ";
-	text.setAttribute("uid", data.uid);
-	text.setAttribute("contenteditable", true);
-	text.id = "world_info_secondtags_text_"+data.uid+"_blank";
-	text.ondragstart=function() {event.preventDefault();event.stopPropagation();};
-	text.setAttribute("draggable", "true");
-	text.onfocus=function() {this.parentElement.parentElement.parentElement.setAttribute('draggable', 'false');this.setAttribute('draggable', 'false');};
-	text.onblur = function () {
-					this.parentElement.parentElement.parentElement.setAttribute('draggable', 'true');
-					this.setAttribute('draggable', 'true');
-					if (this.textContent.trim() != "") {
-						on_new_wi_item = this.id;
-						world_info_data[this.getAttribute('uid')]['keysecondary'].push(this.textContent);
-						send_world_info(this.getAttribute('uid'));
-						this.classList.add("pulse");
-					} else {
-						this.textContent = "    ";
-					}
-				};
-	text.onclick = function () {
-					this.textContent = "";
-				};
-	tag_item.append(text);
-	tag_item.id = "world_info_secondtags_"+data.uid+"_new";
-	tags.append(tag_item);
+	const placeholderTagEl = create_tag_element(null, data.uid, tagType);
+	tagBarEl.append(placeholderTagEl);
+
+	if (attention_wanting_wi_bar === tagType) {
+		const textEl = placeholderTagEl.querySelector(".tag_text");
+		// HACK: Please don't ask because I do not know
+		setTimeout(() => textEl.focus(), 1);
+	}
 }
-	
+
 function create_new_wi_entry(folder) {
 	var uid = -1;
 	for (item of document.getElementsByClassName('world_info_card')) {

From 68c6030ab063a4a58366b69046c41439b0fcfe10 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:04:45 -0500
Subject: [PATCH 035/107] WI: Don't explode when user uploads image without a
 save

This is a band-aid on an underlying problem: there is no save directory
to put blobs like images in before the user saves the game. The way
forward is probably to have an in-memory (no disk, Colab privacy (thats
kind of an oxymoron)) folder or something. I want to expand the data
storage functionality into an api in the future so devs can seamlessly
do something like:

Data.get_file_contents("image/blahblah.png")

and they won't have to actually worry about stuff like this
---
 aiserver.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index dc565c97..8c07efa8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6715,11 +6715,18 @@ def UI_2_set_wi_image(uid):
         except FileNotFoundError:
             pass
     else:
-        # Otherwise assign image
-        with open(path, "wb") as file:
-            file.write(data)
+        try:
+            # Otherwise assign image
+            with open(path, "wb") as file:
+                file.write(data)
+        except FileNotFoundError:
+            show_error_notification(
+                "Unable to write image",
+                "Please save the game before uploading images."
+            )
+            return ":(", 500
     koboldai_vars.gamesaved = False
-    return ":)"
+    return ":)", 200
 
 @app.route("/get_wi_image/<int(signed=True):uid>", methods=["GET"])
 @require_allowed_ip

From 6b26cbbd0a50f7deb3b4c5551aee87cc81ea575a Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:20:40 -0500
Subject: [PATCH 036/107] Backends: Fix ReadOnly

Since somewhere in the pipeline ReadOnly is ignored, the bug wasn't
actually apparent unless using things like the Robot Button in WI cards.
---
 modeling/inference_model.py                 |  7 ++-
 modeling/inference_models/readonly/class.py | 49 ++++++++++++---------
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index a2d4fa63..28d96473 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -597,7 +597,12 @@ class InferenceModel:
             )
 
         time_end = round(time.time() - time_start, 2)
-        tokens_per_second = round(len(result.encoded[0]) / time_end, 2)
+
+        try:
+            tokens_per_second = round(len(result.encoded[0]) / time_end, 2)
+        except ZeroDivisionError:
+            # Introducing KoboldAI's fastest model: ReadOnly!
+            tokens_per_second = 0
 
         if not utils.koboldai_vars.quiet:
             logger.info(
diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py
index 13c38baf..cbdb298d 100644
--- a/modeling/inference_models/readonly/class.py
+++ b/modeling/inference_models/readonly/class.py
@@ -1,12 +1,10 @@
 from __future__ import annotations
 
 import torch
-import requests
 import numpy as np
 from typing import List, Optional, Union
 
 import utils
-from logger import logger
 from modeling.inference_model import (
     GenerationResult,
     GenerationSettings,
@@ -15,29 +13,46 @@ from modeling.inference_model import (
 )
 
 model_backend_name = "Read Only"
-model_backend_type = "Read Only" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
+model_backend_type = "Read Only"  # This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
 
-class BasicAPIException(Exception):
-    """To be used for errors when using the Basic API as an interface."""
+
+class DummyHFTokenizerOut:
+    input_ids = np.array([[]])
+
+
+class FacadeTokenizer:
+    def __init__(self):
+        self._koboldai_header = []
+
+    def decode(self, _input):
+        return ""
+
+    def encode(self, input_text):
+        return []
+
+    def __call__(self, *args, **kwargs) -> DummyHFTokenizerOut:
+        return DummyHFTokenizerOut()
 
 
 class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
-        # Do not allow API to be served over the API
+        # Do not allow ReadOnly to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
-        self.tokenizer = self._tokenizer()
+        self.tokenizer: FacadeTokenizer = None
         self.model = None
         self.model_name = "Read Only"
-    
+
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "ReadOnly"
-    
-    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+
+    def get_requested_parameters(
+        self, model_name, model_path, menu_path, parameters={}
+    ):
         requested_parameters = []
         return requested_parameters
-        
+
     def set_input_parameters(self, parameters):
         return
 
@@ -46,17 +61,9 @@ class model_backend(InferenceModel):
 
     def _initialize_model(self):
         return
-    
-    class _tokenizer():
-        def __init__(self):
-            self._koboldai_header = []
-        def decode(self, _input):
-            return ""
-        def encode(self, input_text):
-            return []
 
     def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
-        self.tokenizer = self.tokenizer
+        self.tokenizer = FacadeTokenizer()
         self.model = None
         utils.koboldai_vars.noai = True
 
@@ -72,7 +79,7 @@ class model_backend(InferenceModel):
     ):
         return GenerationResult(
             model=self,
-            out_batches=np.array([]),
+            out_batches=np.array([[]]),
             prompt=prompt_tokens,
             is_whole_generation=True,
             single_line=single_line,

From c7b128829c8e430423088750a4a71c8f8ba52b22 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:30:43 -0500
Subject: [PATCH 037/107] WI: Fix visual oddness with more than one row of tags

---
 static/koboldai.css | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 0f83a159..75293eaa 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1259,13 +1259,18 @@ td.server_vars {
 .world_info_label_container > .generate-button:hover { opacity: 1.0; }
 
 .tag {
+	display: inline-block;
 	background-color: var(--wi_tag_color);
 	color: var(--wi_tag_text_color);
+
+	margin-right: 3px;
+	margin-top: 3px;
+
 	padding: 2px;
-	margin-right: 2px;
+	padding-left: 3px;
+	padding-right: 3px;
+
 	border-radius: var(--radius_wi_card);
-	border: solid;
-	border-color: var(--wi_tag_color);
 }
 
 .tag .tag_button {

From ccbfad1a13809ed6e18deea1c531a94990b5be6a Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:35:16 -0500
Subject: [PATCH 038/107] UI: Make welcome text links have underlines

Would love to make em' a different color but that would require going
through all the themes and im laaaazy
---
 static/koboldai.css | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/static/koboldai.css b/static/koboldai.css
index 75293eaa..51a70f18 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2849,6 +2849,10 @@ body {
 	height: 100%;
 }
 
+#welcome_text a {
+	text-decoration: underline;
+}
+
 .welcome_text {
 	display: flex;
 	height: 100%;

From 432418ed1e3be0a8573f2d7ab5e36c8f63d68d37 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:44:32 -0500
Subject: [PATCH 039/107] UI: Possibly more clear tooltips

---
 gensettings.py              | 2 +-
 templates/story flyout.html | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gensettings.py b/gensettings.py
index 8d68b4b5..765f6370 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -296,7 +296,7 @@ gensettingstf = [
 	"max": 1,
 	"step": 1,
 	"default": 0,
-    "tooltip": "Scans the AI's output for World Info keys as it is generating the one.",
+    "tooltip": "Look for World Info keys in the AI's response while it is still being generated.",
     "menu_path": "World Info",
     "sub_path": "",
     "classname": "story",
diff --git a/templates/story flyout.html b/templates/story flyout.html
index 514edbb9..193881d7 100644
--- a/templates/story flyout.html	
+++ b/templates/story flyout.html	
@@ -97,7 +97,7 @@
 	<div id="story_menu_wi" class="story_category_area tab-target tab-target-story hidden">
 		<h4 class="section_header" style="margin-left: 12px;">World Info</h4>
 		<div class="help_text" style="margin-left: 20px;">
-			Lore information, which the AI recalls by certain words.
+			Lore information, which the AI recalls with the mention of certain words.
 			<span class="helpicon material-icons-outlined" tooltip="Use this instead of Memory for information on things like characters, objects, events, places, and anything else with detail.">help_icon</span>
 		</div>
 		<div class="setting_tile_area wi_settings">

From 33cec5cc9cdbb7e175c47f50dd384e08534985fe Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:45:02 -0500
Subject: [PATCH 040/107] UI: Fix visual inconsistancies in sidebar

---
 static/koboldai.css | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/static/koboldai.css b/static/koboldai.css
index 51a70f18..4fb24a51 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -3570,10 +3570,15 @@ h2 .material-icons-outlined {
 }
 
 .section_header {
+	font-weight: bold;
 	margin-left: 2px;
 	margin-bottom: 2px;
 }
 
+.story_category_area > * > label {
+	user-select: none
+}
+
 .help_text {
 	margin-left: 6px;
 	margin-bottom: 0.7em;

From 132ed1b5077062a03431491f4f318ea42ebfca91 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 17:53:28 -0500
Subject: [PATCH 041/107] UI: Make experimental tooltip more ominous

The current tooltip made it sound more like "beta features" than "pre
pre pre pre alpha features"
---
 gensettings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensettings.py b/gensettings.py
index 765f6370..51f5cef4 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -739,7 +739,7 @@ gensettingstf = [
  	"max": 1,
  	"step": 1,
  	"default": 0,
-	"tooltip": "If enabled, experimental features will be displayed in the UI.",
+	"tooltip": "If enabled, experimental features will be displayed in the UI. Note: These features have been determined to be too unstable for standard use, and may corrupt your data. You're on your own from here.",
     "menu_path": "Interface",
     "sub_path": "UI",
     "classname": "system",

From 65cf6806a87f1d4b23244aae5e243ef6305f3ab4 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 18:04:35 -0500
Subject: [PATCH 042/107] WI: Enter to save name

---
 static/koboldai.js | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 00bb43ee..66eb44af 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -2092,6 +2092,7 @@ function world_info_entry(data) {
 	} else {
 		world_info_card.classList.remove("used_in_game");
 	}
+
 	const title = world_info_card.querySelector('.world_info_title');
 	title.id = "world_info_title_"+data.uid;
 	title.textContent = data.title;
@@ -2099,7 +2100,7 @@ function world_info_entry(data) {
 	title.setAttribute("original_text", data.title);
 	title.setAttribute("contenteditable", true);
 	title.classList.remove("pulse");
-	title.ondragstart=function() {event.preventDefault();event.stopPropagation();};
+	title.ondragstart=function(event) {event.preventDefault();event.stopPropagation();};
 	title.onblur = function () {
 				this.parentElement.parentElement.setAttribute('draggable', 'true');
 				this.setAttribute('draggable', 'true');
@@ -2109,13 +2110,23 @@ function world_info_entry(data) {
 					this.classList.add("pulse");
 				}
 			}
-	world_info_card.addEventListener('dragstart', dragStart);
-	world_info_card.addEventListener('dragend', dragend);
+
+	title.addEventListener("keydown", function(event) {
+		if (event.key === "Enter") {
+			event.preventDefault();
+			this.blur();
+		}
+	});
+
 	title.addEventListener('dragenter', dragEnter)
 	title.addEventListener('dragover', dragOver);
 	title.addEventListener('dragleave', dragLeave);
 	title.addEventListener('drop', drop);
-	delete_icon = world_info_card.querySelector('.world_info_delete');
+
+	world_info_card.addEventListener('dragstart', dragStart);
+	world_info_card.addEventListener('dragend', dragend);
+
+	const delete_icon = world_info_card.querySelector('.world_info_delete');
 	delete_icon.id = "world_info_delete_"+data.uid;
 	delete_icon.setAttribute("uid", data.uid);
 	delete_icon.setAttribute("wi-title", data.title);

From bd542336f9034dc85a2961e4058c674a42587294 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 18:12:30 -0500
Subject: [PATCH 043/107] WI: Make the noun thingey more intuitive

---
 templates/templates.html | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/templates/templates.html b/templates/templates.html
index 926bf854..ca6e3734 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -22,9 +22,13 @@
 				<span
 					class="world_info_item_type"
 					contenteditable="true"
-					data-placeholder="Person"
+					data-placeholder="..."
 					spellcheck="false"
-				></span> <span class="helpicon material-icons-outlined" tooltip="Please enter a noun that describes a person, place or thing." "]">help_icon</span>
+				></span>
+				<span
+					class="helpicon material-icons-outlined"
+					tooltip='Please enter a noun that describes this entry. For example, "person", "weapon", or "building". This will be used with the Generate Content button below.'
+				>help_icon</span>
 			</div>
 		</div>
 		<span id="world_info_delete_" class="world_info_delete">X</span>

From 3995b3f93b8d67e44a298f8a9034e5adac21c656 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sat, 22 Jul 2023 18:18:21 -0500
Subject: [PATCH 044/107] WI: Make delete button pretty

---
 templates/templates.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/templates.html b/templates/templates.html
index ca6e3734..b87cf812 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -31,7 +31,7 @@
 				>help_icon</span>
 			</div>
 		</div>
-		<span id="world_info_delete_" class="world_info_delete">X</span>
+		<span id="world_info_delete_" class="world_info_delete material-icons-outlined">close</span>
 	</div>
 
 	<div class="world_info_upper_container world_info_tag_area">

From dd8e5f5d0580c6320557d79ed536375c1ca669c4 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 23 Jul 2023 21:40:08 +0800
Subject: [PATCH 045/107] updated lite to v50

---
 static/klite.html | 127 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 103 insertions(+), 24 deletions(-)

diff --git a/static/klite.html b/static/klite.html
index 4b62dc2f..57c877cc 100644
--- a/static/klite.html
+++ b/static/klite.html
@@ -3,7 +3,7 @@
 
 <!-- 
 An embedded version of Kobold Lite for use in koboldcpp and KoboldAI United Client
-Current version: 46
+Current version: 50
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and KoboldAI United Client. Please do not remove this line.
 
@@ -1118,6 +1118,11 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			padding: min(0.4vw, 5px);
 		}
 
+		.tablelines
+		{
+			border: 1px solid;
+		}
+
 
 		.scenariopopup {
 			width: 600px;
@@ -1749,6 +1754,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			"title":"New Instruct",
 			"desc":"Starts a new game in instruct mode, using your current settings.",
 			"opmode":4,
+			"instruct_starttag": "\\n### Instruction:\\n",
+			"instruct_endtag": "\\n### Response:\\n",
 			"prefmodel1":["alpaca","gpt4all","supercot","wizard","vicuna","manticore","mantis","airoboros","chronos"],
 			"prefmodel2":["erebus","nerys","nerybus","janeway","opt"],
 			"prompt":"",
@@ -2039,6 +2046,20 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			"authorsnote": "",
 			"worldinfo": []
 		},
+		{
+			"title":"InteracTV",
+			"author":"Henky!!",
+			"desc":"Simulate an interactive TV that will let the user watch anything they want to watch. Designed for lower temperatures (0.5)",
+			"opmode":4,
+			"instruct_starttag": "\\n### Instruction:\\n",
+			"instruct_endtag": "\\n### Response:\\n",
+			"prefmodel1":["alpaca","gpt4all","supercot","wizard","vicuna","manticore","mantis","airoboros","chronos"],
+			"prefmodel2":["erebus","nerys","nerybus","janeway","opt"],
+			"prompt":"Welcome to your InteracTV, your interactive TV of the future today!\nPlease enter what you would like to watch:",
+			"memory": "### Instruction:\n\nSimulate an interactive TV that will let the user watch anything they want to watch.\n\nFirst, generate a single response prompting the user for input on what they wish to watch using the following response:\n```\nPlease enter your desired content:\n```\n\nAfter the user has entered the desired content generate the following table:\n- TV Show / Movie Name: Name of the show\n- Genre: Genre of the show\n- Program Description: Description of what the program is about, this can be any known or unkown TV or movie format.\n- Episode Name: Name of the episode\n- Episode Description: Description of what the episode is about.\n\nAfter generating this table promp the user if they wish to watch the episode with the following response and then end your generation:\n```\nDo you wish to watch this episode? (Y/N/Menu)\n### Instruction:\n```\n\nIf the user chooses not to watch the episode generate a new episode with their requested content.\nIf the user chooses to go to the Menu ask them again what they would like to watch.\n\nIf the user chooses to watch the episode begin generating a long multiple paragraph detailed story based on the episode description, make it exciting and fun.\n\nEnd your response after each question presented to the user so that the user has a chance to respond.\n\nMain menu:\n```\nMenu Options\nA) Input a different content request\nB) Generate a different episode of the same content.\n### Instruction:\n```\n### Response:",
+			"authorsnote": "",
+			"worldinfo": []
+		}
 		];
 	</script>
 
@@ -2589,8 +2610,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		return segmentsA.length - segmentsB.length;
 	}
 
+
+	function convertMarkdownTableToHtml(t){let hsep = /^[\s]*\|(?:[\s]*[-:]+[-:|\s]*)+\|[\s]*$/gm;let l=/^[\s]*\|(.*)\|[\s]*$/gm,r=t.split(/\r?\n|\r/),e="<table class='tablelines'>";for(let o of r){let hs=o.match(hsep);if(hs){continue;}let d=o.match(l);if(d){let i=d[0].split("|").map(t=>t.trim());e+=`<tr class='tablelines'><td class='tablelines'>${i.join("</td><td class='tablelines'>")}</td></tr>`}}return e+"</table>"}
 	//casualwriter casual-markdown, under MIT license
-	function simpleMarkdown(e){var r=function(e){return e.replace(/</g,"<").replace(/\>/g,">")},l=function(e,r){return"<pre><code>"+(r=(r=(r=(r=(r=r.replace(/</g,"&lt;").replace(/\>/g,"&gt;")).replace(/\t/g,"   ").replace(/\^\^\^(.+?)\^\^\^/g,"<mark>$1</mark>")).replace(/^\/\/(.*)/gm,"<rem>//$1</rem>").replace(/\s\/\/(.*)/gm," <rem>//$1</rem>")).replace(/(\s?)(function|procedure|return|exit|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,"$1<b>$2</b>$3")).replace(/(\s?)(var|let|const|=>|for|next|do|while|loop|continue|break|switch|try|catch|finally)(\s)/gim,"$1<b>$2</b>$3"))+"</code></pre>"},c=function(e){return(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=e.replace(/^##### (.*?)\s*#*$/gm,"<h5>$1</h5>").replace(/^#### (.*?)\s*#*$/gm,"<h4>$1</h4>").replace(/^### (.*?)\s*#*$/gm,"<h3>$1</h3>").replace(/^## (.*?)\s*#*$/gm,"<h2>$1</h2>").replace(/^# (.*?)\s*#*$/gm,"<h1>$1</h1>").replace(/^<h(\d)\>(.*?)\s*{(.*)}\s*<\/h\d\>$/gm,'<h$1 id="$3">$2</h$1>')).replace(/^-{3,}|^\_{3,}|^\*{3,}$/gm,"<hr/>")).replace(/``(.*?)``/gm,function(e,l){return"<code>"+r(l).replace(/`/g,"`")+"</code>"})).replace(/`(.*?)`/gm,"<code>$1</code>")).replace(/^\>\> (.*$)/gm,"<blockquote><blockquote>$1</blockquote></blockquote>")).replace(/^\> (.*$)/gm,"<blockquote>$1</blockquote>")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n<br>")).replace(/!\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<img alt="$1" src="$2" $3 />')).replace(/!\[(.*?)\]\((.*?)\)/gm,'<img alt="$1" src="$2" />')).replace(/\[(.*?)\]\((.*?) "new"\)/gm,'<a href="$2" target=_new>$1</a>')).replace(/\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<a href="$2" title="$3">$1</a>')).replace(/<http(.*?)\>/gm,'<a href="http$1">http$1</a>')).replace(/\[(.*?)\]\(\)/gm,'<a href="$1">$1</a>')).replace(/\[(.*?)\]\((.*?)\)/gm,'<a href="$2">$1</a>')).replace(/^[\*+-][ .](.*)/gm,"<ul><li>$1</li></ul>")).replace(/^\d\d?[ .](.*)([\n]?)/gm,"<ol><li>$1</li></ol>").replace(/<\/li><\/ol><ol><li>/gm,"</li><li>")).replace(/^\s{2,6}[\*+-][ .](.*)/gm,"<ul><ul><li>$1</li></ul></ul>")).replace(/^\s{2,6}\d[ .](.*)/gm,"<ul><ol><li>$1</li></ol></ul>")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm,"<b><em>$1</em></b>")).replace(/\*\*(\w.*?[^\\])\*\*/gm,"<b>$1</b>")).replace(/\*(\w.*?[^\\])\*/gm,"<em>$1</em>")).replace(/___(\w.*?[^\\])___/gm,"<b><em>$1</em></b>")).replace(/__(\w.*?[^\\])__/gm,"<u>$1</u>")).replace(/~~(\w.*?)~~/gm,"<del>$1</del>")).replace(/\^\^(\w.*?)\^\^/gm,"<ins>$1</ins>")).replace(/\{\{(\w.*?)\}\}/gm,"<mark>$1</mark>")).replace(/\n\|([\s\S]*)\|\s*\n\s*\n/g,function(e,r){var l;return"\n<table><thead>\n<tr><th>"+r.substr(0,r.indexOf("\n")-1).replace(/\|/g,"<th>")+"</thead>\n<tr>"+r.replace(/.*\n\|\-(.*)\-\|\n/g,"").replace(/\|\s*\n/g,"\n<tr>").replace(/\|/g,"<td>")+"\n</tr></table>\n\n"})).replace(/  \n/g,"\n<br/>").replace(/\n\s*\n/g,"\n<p>\n")).replace(/^ {4,10}(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/^\t(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/<\/code\><\/pre\>\n<pre\><code\>/g,"\n")).replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm,"$1")},a=0,n=0,p="";for(e=(e=e.replace(/\r\n/g,"\n").replace(/\n~~~/g,"\n```")).replace(/```([^`]+)```/g,l);(a=e.indexOf("<code>"))>=0;)n=e.indexOf("</code>",a),p+=c(e.substr(0,a))+e.substr(a+6,n>0?n-a-6:mdtext.length),e=e.substr(n+7);return p+c(e)}
+	function simpleMarkdown(e){var r=function(e){return e.replace(/</g,"<").replace(/\>/g,">")},l=function(e,r){return"<pre><code>"+(r=(r=(r=(r=(r=r.replace(/</g,"&lt;").replace(/\>/g,"&gt;")).replace(/\t/g,"   ").replace(/\^\^\^(.+?)\^\^\^/g,"<mark>$1</mark>")).replace(/^\/\/(.*)/gm,"<rem>//$1</rem>").replace(/\s\/\/(.*)/gm," <rem>//$1</rem>")).replace(/(\s?)(function|procedure|return|exit|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,"$1<b>$2</b>$3")).replace(/(\s?)(var|let|const|=>|for|next|do|while|loop|continue|break|switch|try|catch|finally)(\s)/gim,"$1<b>$2</b>$3"))+"</code></pre>"},c=function(e){return(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=e.replace(/^##### (.*?)\s*#*$/gm,"<h5>$1</h5>").replace(/^#### (.*?)\s*#*$/gm,"<h4>$1</h4>").replace(/^### (.*?)\s*#*$/gm,"<h3>$1</h3>").replace(/^## (.*?)\s*#*$/gm,"<h2>$1</h2>").replace(/^# (.*?)\s*#*$/gm,"<h1>$1</h1>").replace(/^<h(\d)\>(.*?)\s*{(.*)}\s*<\/h\d\>$/gm,'<h$1 id="$3">$2</h$1>')).replace(/^-{3,}|^\_{3,}|^\*{3,}$/gm,"<hr/>")).replace(/``(.*?)``/gm,function(e,l){return"<code>"+r(l).replace(/`/g,"`")+"</code>"})).replace(/`(.*?)`/gm,"<code>$1</code>")).replace(/^\>\> (.*$)/gm,"<blockquote><blockquote>$1</blockquote></blockquote>")).replace(/^\> (.*$)/gm,"<blockquote>$1</blockquote>")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n<br>")).replace(/!\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<img alt="$1" src="$2" $3 />')).replace(/!\[(.*?)\]\((.*?)\)/gm,'<img alt="$1" src="$2" />')).replace(/\[(.*?)\]\((.*?) "new"\)/gm,'<a href="$2" target=_new>$1</a>')).replace(/\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<a href="$2" title="$3">$1</a>')).replace(/<http(.*?)\>/gm,'<a href="http$1">http$1</a>')).replace(/\[(.*?)\]\(\)/gm,'<a href="$1">$1</a>')).replace(/\[(.*?)\]\((.*?)\)/gm,'<a href="$2">$1</a>')).replace(/^[\*+-][ .](.*)/gm,"<ul><li>$1</li></ul>")).replace(/^\d\d?[ .](.*)([\n]?)/gm,"<ol><li>$1</li></ol>").replace(/<\/li><\/ol><ol><li>/gm,"</li><li>")).replace(/^\s{2,6}[\*+-][ .](.*)/gm,"<ul><ul><li>$1</li></ul></ul>")).replace(/^\s{2,6}\d[ .](.*)/gm,"<ul><ol><li>$1</li></ol></ul>")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm,"<b><em>$1</em></b>")).replace(/\*\*(\w.*?[^\\])\*\*/gm,"<b>$1</b>")).replace(/\*(\w.*?[^\\])\*/gm,"<em>$1</em>")).replace(/___(\w.*?[^\\])___/gm,"<b><em>$1</em></b>")).replace(/__(\w.*?[^\\])__/gm,"<u>$1</u>")).replace(/~~(\w.*?)~~/gm,"<del>$1</del>")).replace(/\^\^(\w.*?)\^\^/gm,"<ins>$1</ins>")).replace(/\{\{(\w.*?)\}\}/gm,"<mark>$1</mark>")).replace(/^((?:\|[^|\r\n]*[^|\r\n\s]\s*)+\|(?:\r?\n|\r|))+/gm,function (matchedTable){return convertMarkdownTableToHtml(matchedTable);})).replace(/  \n/g,"\n<br/>").replace(/\n\s*\n/g,"\n<p>\n")).replace(/^ {4,10}(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/^\t(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/<\/code\><\/pre\>\n<pre\><code\>/g,"\n")).replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm,"$1")},a=0,n=0,p="";for(e=(e=e.replace(/\r\n/g,"\n").replace(/\n~~~/g,"\n```")).replace(/```([^`]+)```/g,l);(a=e.indexOf("<code>"))>=0;)n=e.indexOf("</code>",a),p+=c(e.substr(0,a))+e.substr(a+6,n>0?n-a-6:mdtext.length),e=e.substr(n+7);return p+c(e)}
 
 	//LMZA-JS, under MIT license
 	var lz_c=function(){"use strict";function r(e,r){postMessage({action:Ur,cbn:r,result:e})}function t(e){var r=[];return r[e-1]=void 0,r}function n(e,r){return i(e[0]+r[0],e[1]+r[1])}function s(e,r){return f(~~Math.max(Math.min(e[1]/$r,2147483647),-2147483648)&~~Math.max(Math.min(r[1]/$r,2147483647),-2147483648),c(e)&c(r))}function o(e,r){var t,n;return e[0]==r[0]&&e[1]==r[1]?0:(t=0>e[1],n=0>r[1],t&&!n?-1:!t&&n?1:h(e,r)[1]<0?-1:1)}function i(e,r){var t,n;for(r%=0x10000000000000000,e%=0x10000000000000000,t=r%$r,n=Math.floor(e/$r)*$r,r=r-t+n,e=e-n+t;0>e;)e+=$r,r-=$r;for(;e>4294967295;)e-=$r,r+=$r;for(r%=0x10000000000000000;r>0x7fffffff00000000;)r-=0x10000000000000000;for(;-0x8000000000000000>r;)r+=0x10000000000000000;return[e,r]}function _(e,r){return e[0]==r[0]&&e[1]==r[1]}function a(e){return e>=0?[e,0]:[e+$r,-$r]}function c(e){return e[0]>=2147483648?~~Math.max(Math.min(e[0]-$r,2147483647),-2147483648):~~Math.max(Math.min(e[0],2147483647),-2147483648)}function f(e,r){var t,n;return t=e*$r,n=r,0>r&&(n+=$r),[n,t]}function u(e){return 30>=e?1<<e:u(30)*u(e-30)}function m(e,r){var t,n,s,o;if(r&=63,_(e,rt))return r?tt:e;if(0>e[1])throw Error("Neg");return o=u(r),n=e[1]*o%0x10000000000000000,s=e[0]*o,t=s-s%$r,n+=t,s-=t,n>=0x8000000000000000&&(n-=0x10000000000000000),[s,n]}function p(e,r){var t;return r&=63,t=u(r),i(Math.floor(e[0]/t),e[1]/t)}function d(e,r){var t;return r&=63,t=p(e,r),0>e[1]&&(t=n(t,m([2,0],63-r))),t}function h(e,r){return i(e[0]-r[0],e[1]-r[1])}function P(e,r){return e.dc=r,e.hc=0,e.Db=r.length,e}function l(e,r,t,n){return e.hc>=e.Db?-1:(n=Math.min(n,e.Db-e.hc),b(e.dc,e.hc,r,t,n),e.hc+=n,n)}function v(e){return e.dc=t(32),e.Db=0,e}function B(e){var r=e.dc;return r.length=e.Db,r}function k(e,r){e.dc[e.Db++]=r<<24>>24}function S(e,r,t,n){b(r,t,e.dc,e.Db,n),e.Db+=n}function M(e,r,t,n,s){var o;for(o=r;t>o;++o)n[s++]=e.charCodeAt(o)}function b(e,r,t,n,s){for(var o=0;s>o;++o)t[n+o]=e[r+o]}function E(e,r){fr(r,1<<e.s),r.j=e.f,ur(r,e.m),r.U=0,r.V=3,r.N=2,r.u=3}function g(r,t,n,s,i){var _,a;if(o(s,et)<0)throw Error("invalid length "+s);for(r.gc=s,_=U({}),E(i,_),_.Xb=void 0===lz_c.disableEndMark,mr(_,n),a=0;64>a;a+=8)k(n,255&c(p(s,a)));r.Ub=(_.L=0,_.Kb=t,_.Gb=0,Q(_),_.c.cc=n,or(_),$(_),X(_),_.P.fb=_.j+1-2,br(_.P,1<<_.N),_.f.fb=_.j+1-2,br(_.f,1<<_.N),void(_.x=tt),Z({},_))}function y(e,r,t){return e._b=v({}),g(e,P({},r),e._b,a(r.length),t),e}function R(e,r,n,s){var o;e.Rb=r,e.zb=n,o=r+n+s,(null==e.d||e.nb!=o)&&(e.d=null,e.nb=o,e.d=t(e.nb)),e.B=e.nb-n}function F(e,r){return e.d[e.e+e.v+r]}function L(e,r,t,n){var s,o;for(e.K&&e.v+r+n>e.q&&(n=e.q-(e.v+r)),++t,o=e.e+e.v+r,s=0;n>s&&e.d[o+s]==e.d[o+s-t];++s);return s}function z(e){return e.q-e.v}function C(e){var r,t,n;for(n=e.e+e.v-e.Rb,n>0&&--n,t=e.e+e.q-n,r=0;t>r;++r)e.d[r]=e.d[n+r];e.e-=n}function w(e){var r;++e.v,e.v>e.jb&&(r=e.e+e.v,r>e.B&&C(e),x(e))}function x(e){var r,t,n;if(!e.K)for(;;){if(n=-e.e+e.nb-e.q,!n)return;if(r=l(e.ac,e.d,e.e+e.q,n),-1==r)return e.jb=e.q,t=e.e+e.jb,t>e.B&&(e.jb=e.B-e.e),void(e.K=1);e.q+=r,e.q>=e.v+e.zb&&(e.jb=e.q-e.zb)}}function D(e,r){e.e+=r,e.jb-=r,e.v-=r,e.q-=r}function A(e,r,n,s,o){var i,_,a;1073741567>r&&(e.Vb=16+(s>>1),a=~~((r+n+s+o)/2)+256,R(e,r+n,s+o,a),e.bb=s,i=r+1,e.l!=i&&(e.E=t(2*(e.l=i))),_=65536,e.ab&&(_=r-1,_|=_>>1,_|=_>>2,_|=_>>4,_|=_>>8,_>>=1,_|=65535,_>16777216&&(_>>=1),e.Wb=_,++_,_+=e.F),_!=e.Ib&&(e.$=t(e.Ib=_)))}function I(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v,B,k,S,M;if(e.q>=e.v+e.bb)h=e.bb;else if(h=e.q-e.v,e.ib>h)return H(e),0;for(v=0,P=e.v>e.l?e.v-e.l:0,n=e.e+e.v,l=1,c=0,f=0,e.ab?(M=st[255&e.d[n]]^255&e.d[n+1],c=1023&M,M^=(255&e.d[n+2])<<8,f=65535&M,u=(M^st[255&e.d[n+3]]<<5)&e.Wb):u=255&e.d[n]^(255&e.d[n+1])<<8,s=e.$[e.F+u]||0,e.ab&&(o=e.$[c]||0,i=e.$[1024+f]||0,e.$[c]=e.v,e.$[1024+f]=e.v,o>P&&e.d[e.e+o]==e.d[n]&&(r[v++]=l=2,r[v++]=e.v-o-1),i>P&&e.d[e.e+i]==e.d[n]&&(i==o&&(v-=2),r[v++]=l=3,r[v++]=e.v-i-1,o=i),0!=v&&o==s&&(v-=2,l=1)),e.$[e.F+u]=e.v,k=(e.h<<1)+1,S=e.h<<1,p=d=e.s,0!=e.s&&s>P&&e.d[e.e+s+e.s]!=e.d[n+e.s]&&(r[v++]=l=e.s,r[v++]=e.v-s-1),t=e.Vb;;){if(P>=s||0==t--){e.E[k]=e.E[S]=0;break}if(a=e.v-s,_=(e.h>=a?e.h-a:e.h-a+e.l)<<1,B=e.e+s,m=d>p?p:d,e.d[B+m]==e.d[n+m]){for(;++m!=h&&e.d[B+m]==e.d[n+m];);if(m>l&&(r[v++]=l=m,r[v++]=a-1,m==h)){e.E[S]=e.E[_],e.E[k]=e.E[_+1];break}}(255&e.d[n+m])>(255&e.d[B+m])?(e.E[S]=s,S=_+1,s=e.E[S],d=m):(e.E[k]=s,k=_,s=e.E[k],p=m)}return H(e),v}function O(e){e.e=0,e.v=0,e.q=0,e.K=0,x(e),e.h=0,D(e,-1)}function H(e){var r;++e.h>=e.l&&(e.h=0),w(e),1073741823==e.v&&(r=e.v-e.l,N(e.E,2*e.l,r),N(e.$,e.Ib,r),D(e,r))}function N(e,r,t){var n,s;for(n=0;r>n;++n)s=e[n]||0,t>=s?s=0:s-=t,e[n]=s}function G(e,r){e.ab=r>2,e.ab?(e.s=0,e.ib=4,e.F=66560):(e.s=2,e.ib=3,e.F=0)}function T(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v;do{if(e.q>=e.v+e.bb)p=e.bb;else if(p=e.q-e.v,e.ib>p){H(e);continue}for(d=e.v>e.l?e.v-e.l:0,n=e.e+e.v,e.ab?(v=st[255&e.d[n]]^255&e.d[n+1],_=1023&v,e.$[_]=e.v,v^=(255&e.d[n+2])<<8,a=65535&v,e.$[1024+a]=e.v,c=(v^st[255&e.d[n+3]]<<5)&e.Wb):c=255&e.d[n]^(255&e.d[n+1])<<8,s=e.$[e.F+c],e.$[e.F+c]=e.v,P=(e.h<<1)+1,l=e.h<<1,u=m=e.s,t=e.Vb;;){if(d>=s||0==t--){e.E[P]=e.E[l]=0;break}if(i=e.v-s,o=(e.h>=i?e.h-i:e.h-i+e.l)<<1,h=e.e+s,f=m>u?u:m,e.d[h+f]==e.d[n+f]){for(;++f!=p&&e.d[h+f]==e.d[n+f];);if(f==p){e.E[l]=e.E[o],e.E[P]=e.E[o+1];break}}(255&e.d[n+f])>(255&e.d[h+f])?(e.E[l]=s,l=o+1,s=e.E[l],m=f):(e.E[P]=s,P=o,s=e.E[P],u=f)}H(e)}while(0!=--r)}function W(e){return e-=2,4>e?e:3}function Y(e){return 4>e?0:10>e?e-3:e-6}function Z(e,r){return e._=r,e.ic=null,e.bc=1,e}function V(e){if(!e.bc)throw Error("bad state");if(!e._)throw Error("No decoding");return j(e),e.bc}function j(e){J(e._,e._.tb,e._.Nb,e._.$b),e.Ob=e._.tb[0],e._.$b[0]&&(cr(e._),e.bc=0)}function K(e,r){var t,n,s,o;e.W=r,s=e.a[r].n,n=e.a[r].g;do e.a[r].p&&(Cr(e.a[s]),e.a[s].n=s-1,e.a[r].Sb&&(e.a[s-1].p=0,e.a[s-1].n=e.a[r].n2,e.a[s-1].g=e.a[r].g2)),o=s,t=n,n=e.a[o].g,s=e.a[o].n,e.a[o].g=t,e.a[o].n=r,r=o;while(r>0);return e.Z=e.a[0].g,e.m=e.a[0].n}function q(e){e.i=0,e.C=0;for(var r=0;4>r;++r)e.r[r]=0}function J(e,r,t,s){var i,f,u,m,p,d,P,l,v,B,k,S,M,b,E;if(r[0]=tt,t[0]=tt,s[0]=1,e.Kb&&(e.b.ac=e.Kb,O(e.b),e.L=1,e.Kb=null),!e.Gb){if(e.Gb=1,b=e.x,_(e.x,tt)){if(!z(e.b))return void er(e,c(e.x));_r(e),M=c(e.x)&e.u,Tr(e.c,e.z,(e.i<<4)+M,0),e.i=Y(e.i),u=F(e.b,-e.o),Rr(gr(e.y,c(e.x),e.C),e.c,u),e.C=u,--e.o,e.x=n(e.x,nt)}if(!z(e.b))return void er(e,c(e.x));for(;;){if(P=rr(e,c(e.x)),B=e.Z,M=c(e.x)&e.u,f=(e.i<<4)+M,1==P&&-1==B)Tr(e.c,e.z,f,0),u=F(e.b,-e.o),E=gr(e.y,c(e.x),e.C),7>e.i?Rr(E,e.c,u):(v=F(e.b,-e.r[0]-1-e.o),Fr(E,e.c,v,u)),e.C=u,e.i=Y(e.i);else{if(Tr(e.c,e.z,f,1),4>B){if(Tr(e.c,e.S,e.i,1),B?(Tr(e.c,e.Y,e.i,1),1==B?Tr(e.c,e.ob,e.i,0):(Tr(e.c,e.ob,e.i,1),Tr(e.c,e.Mb,e.i,B-2))):(Tr(e.c,e.Y,e.i,0),1==P?Tr(e.c,e.Q,f,0):Tr(e.c,e.Q,f,1)),1==P?e.i=7>e.i?9:11:(kr(e.f,e.c,P-2,M),e.i=7>e.i?8:11),m=e.r[B],0!=B){for(d=B;d>=1;--d)e.r[d]=e.r[d-1];e.r[0]=m}}else{for(Tr(e.c,e.S,e.i,0),e.i=7>e.i?7:10,kr(e.P,e.c,P-2,M),B-=4,S=dr(B),l=W(P),Dr(e.D[l],e.c,S),S>=4&&(p=(S>>1)-1,i=(2|1&S)<<p,k=B-i,14>S?Hr(e.sb,i-S-1,e.c,p,k):(Wr(e.c,k>>4,p-4),Ir(e.M,e.c,15&k),++e.rb)),m=B,d=3;d>=1;--d)e.r[d]=e.r[d-1];e.r[0]=m,++e.pb}e.C=F(e.b,P-1-e.o)}if(e.o-=P,e.x=n(e.x,a(P)),!e.o){if(e.pb>=128&&$(e),e.rb>=16&&X(e),r[0]=e.x,t[0]=Yr(e.c),!z(e.b))return void er(e,c(e.x));if(o(h(e.x,b),[4096,0])>=0)return e.Gb=0,void(s[0]=0)}}}}function Q(e){var r,t;e.b||(r={},t=4,e.J||(t=2),G(r,t),e.b=r),Er(e.y,e.U,e.V),(e.R!=e.gb||e.kb!=e.j)&&(A(e.b,e.R,4096,e.j,274),e.gb=e.R,e.kb=e.j)}function U(e){var r;for(e.r=t(4),e.a=[],e.c={},e.z=t(192),e.S=t(12),e.Y=t(12),e.ob=t(12),e.Mb=t(12),e.Q=t(192),e.D=[],e.sb=t(114),e.M=xr({},4),e.P=Sr({}),e.f=Sr({}),e.y={},e.k=[],e.H=[],e.X=[],e.Jb=t(16),e.t=t(4),e.G=t(4),e.tb=[tt],e.Nb=[tt],e.$b=[0],e.Eb=t(5),e.Pb=t(128),e.hb=0,e.J=1,e.A=0,e.kb=-1,e.Z=0,r=0;4096>r;++r)e.a[r]={};for(r=0;4>r;++r)e.D[r]=xr({},6);return e}function X(e){for(var r=0;16>r;++r)e.Jb[r]=Or(e.M,r);e.rb=0}function $(e){var r,t,n,s,o,i,_,a;for(s=4;128>s;++s)i=dr(s),n=(i>>1)-1,r=(2|1&i)<<n,e.Pb[s]=Nr(e.sb,r-i-1,n,s-r);for(o=0;4>o;++o){for(t=e.D[o],_=o<<6,i=0;e.yb>i;++i)e.H[_+i]=Ar(t,i);for(i=14;e.yb>i;++i)e.H[_+i]+=(i>>1)-1-4<<6;for(a=128*o,s=0;4>s;++s)e.X[a+s]=e.H[_+s];for(;128>s;++s)e.X[a+s]=e.H[_+dr(s)]+e.Pb[s]}e.pb=0}function er(e,r){ar(e),pr(e,r&e.u);for(var t=0;5>t;++t)Vr(e.c)}function rr(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v,B,k,S,M,b,E,g,y,R,C,w,x,D,A,I,O,H,N,G,T,W,Z,V,j,q,J,Q,U,X,$,er,rr,or;if(e.W!=e.m)return d=e.a[e.m].n-e.m,e.Z=e.a[e.m].g,e.m=e.a[e.m].n,d;if(e.m=e.W=0,e.I?(p=e.hb,e.I=0):p=_r(e),C=e.A,y=z(e.b)+1,2>y)return e.Z=-1,1;for(y>273&&(y=273),V=0,f=0;4>f;++f)e.t[f]=e.r[f],e.G[f]=L(e.b,-1,e.t[f],273),e.G[f]>e.G[V]&&(V=f);if(e.G[V]>=e.j)return e.Z=V,d=e.G[V],ir(e,d-1),d;if(p>=e.j)return e.Z=e.k[C-1]+4,ir(e,p-1),p;if(a=F(e.b,-1),v=F(e.b,-e.r[0]-1-1),2>p&&a!=v&&2>e.G[V])return e.Z=-1,1;if(e.a[0].Yb=e.i,H=r&e.u,e.a[1].w=it[e.z[(e.i<<4)+H]>>>2]+zr(gr(e.y,r,e.C),e.i>=7,v,a),Cr(e.a[1]),B=it[2048-e.z[(e.i<<4)+H]>>>2],Z=B+it[2048-e.S[e.i]>>>2],v==a&&(j=Z+sr(e,e.i,H),e.a[1].w>j&&(e.a[1].w=j,wr(e.a[1]))),m=p>=e.G[V]?p:e.G[V],2>m)return e.Z=e.a[1].g,1;e.a[1].n=0,e.a[0].Ab=e.t[0],e.a[0].xb=e.t[1],e.a[0].wb=e.t[2],e.a[0].Lb=e.t[3],u=m;do e.a[u--].w=268435455;while(u>=2);for(f=0;4>f;++f)if(W=e.G[f],!(2>W)){G=Z+nr(e,f,e.i,H);do o=G+Mr(e.f,W-2,H),A=e.a[W],A.w>o&&(A.w=o,A.n=0,A.g=f,A.p=0);while(--W>=2)}if(g=B+it[e.S[e.i]>>>2],u=e.G[0]>=2?e.G[0]+1:2,p>=u){for(w=0;u>e.k[w];)w+=2;for(;c=e.k[w+1],o=g+tr(e,c,u,H),A=e.a[u],A.w>o&&(A.w=o,A.n=0,A.g=c+4,A.p=0),u!=e.k[w]||(w+=2,w!=C);++u);}for(t=0;;){if(++t,t==m)return K(e,t);if(k=_r(e),C=e.A,k>=e.j)return e.hb=k,e.I=1,K(e,t);if(++r,O=e.a[t].n,e.a[t].p?(--O,e.a[t].Sb?(J=e.a[e.a[t].n2].Yb,J=4>e.a[t].g2?7>J?8:11:7>J?7:10):J=e.a[O].Yb,J=Y(J)):J=e.a[O].Yb,O==t-1?J=e.a[t].g?Y(J):7>J?9:11:(e.a[t].p&&e.a[t].Sb?(O=e.a[t].n2,I=e.a[t].g2,J=7>J?8:11):(I=e.a[t].g,J=4>I?7>J?8:11:7>J?7:10),D=e.a[O],4>I?I?1==I?(e.t[0]=D.xb,e.t[1]=D.Ab,e.t[2]=D.wb,e.t[3]=D.Lb):2==I?(e.t[0]=D.wb,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.Lb):(e.t[0]=D.Lb,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.wb):(e.t[0]=D.Ab,e.t[1]=D.xb,e.t[2]=D.wb,e.t[3]=D.Lb):(e.t[0]=I-4,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.wb)),e.a[t].Yb=J,e.a[t].Ab=e.t[0],e.a[t].xb=e.t[1],e.a[t].wb=e.t[2],e.a[t].Lb=e.t[3],_=e.a[t].w,a=F(e.b,-1),v=F(e.b,-e.t[0]-1-1),H=r&e.u,n=_+it[e.z[(J<<4)+H]>>>2]+zr(gr(e.y,r,F(e.b,-2)),J>=7,v,a),b=e.a[t+1],S=0,b.w>n&&(b.w=n,b.n=t,b.g=-1,b.p=0,S=1),B=_+it[2048-e.z[(J<<4)+H]>>>2],Z=B+it[2048-e.S[J]>>>2],v!=a||t>b.n&&!b.g||(j=Z+(it[e.Y[J]>>>2]+it[e.Q[(J<<4)+H]>>>2]),b.w>=j&&(b.w=j,b.n=t,b.g=0,b.p=0,S=1)),R=z(e.b)+1,R=R>4095-t?4095-t:R,y=R,!(2>y)){if(y>e.j&&(y=e.j),!S&&v!=a&&(U=Math.min(R-1,e.j),P=L(e.b,0,e.t[0],U),P>=2)){for(Q=Y(J),N=r+1&e.u,E=n+it[2048-e.z[(Q<<4)+N]>>>2]+it[2048-e.S[Q]>>>2],x=t+1+P;x>m;)e.a[++m].w=268435455;o=E+(X=Mr(e.f,P-2,N),X+nr(e,0,Q,N)),A=e.a[x],A.w>o&&(A.w=o,A.n=t+1,A.g=0,A.p=1,A.Sb=0)}for(q=2,T=0;4>T;++T)if(h=L(e.b,-1,e.t[T],y),!(2>h)){l=h;do{for(;t+h>m;)e.a[++m].w=268435455;o=Z+($=Mr(e.f,h-2,H),$+nr(e,T,J,H)),A=e.a[t+h],A.w>o&&(A.w=o,A.n=t,A.g=T,A.p=0)}while(--h>=2);if(h=l,T||(q=h+1),R>h&&(U=Math.min(R-1-h,e.j),P=L(e.b,h,e.t[T],U),P>=2)){for(Q=7>J?8:11,N=r+h&e.u,s=Z+(er=Mr(e.f,h-2,H),er+nr(e,T,J,H))+it[e.z[(Q<<4)+N]>>>2]+zr(gr(e.y,r+h,F(e.b,h-1-1)),1,F(e.b,h-1-(e.t[T]+1)),F(e.b,h-1)),Q=Y(Q),N=r+h+1&e.u,M=s+it[2048-e.z[(Q<<4)+N]>>>2],E=M+it[2048-e.S[Q]>>>2],x=h+1+P;t+x>m;)e.a[++m].w=268435455;o=E+(rr=Mr(e.f,P-2,N),rr+nr(e,0,Q,N)),A=e.a[t+x],A.w>o&&(A.w=o,A.n=t+h+1,A.g=0,A.p=1,A.Sb=1,A.n2=t,A.g2=T)}}if(k>y){for(k=y,C=0;k>e.k[C];C+=2);e.k[C]=k,C+=2}if(k>=q){for(g=B+it[e.S[J]>>>2];t+k>m;)e.a[++m].w=268435455;for(w=0;q>e.k[w];)w+=2;for(h=q;;++h)if(i=e.k[w+1],o=g+tr(e,i,h,H),A=e.a[t+h],A.w>o&&(A.w=o,A.n=t,A.g=i+4,A.p=0),h==e.k[w]){if(R>h&&(U=Math.min(R-1-h,e.j),P=L(e.b,h,i,U),P>=2)){for(Q=7>J?7:10,N=r+h&e.u,s=o+it[e.z[(Q<<4)+N]>>>2]+zr(gr(e.y,r+h,F(e.b,h-1-1)),1,F(e.b,h-(i+1)-1),F(e.b,h-1)),Q=Y(Q),N=r+h+1&e.u,M=s+it[2048-e.z[(Q<<4)+N]>>>2],E=M+it[2048-e.S[Q]>>>2],x=h+1+P;t+x>m;)e.a[++m].w=268435455;o=E+(or=Mr(e.f,P-2,N),or+nr(e,0,Q,N)),A=e.a[t+x],A.w>o&&(A.w=o,A.n=t+h+1,A.g=0,A.p=1,A.Sb=1,A.n2=t,A.g2=i+4)}if(w+=2,w==C)break}}}}}function tr(e,r,t,n){var s,o=W(t);return s=128>r?e.X[128*o+r]:e.H[(o<<6)+hr(r)]+e.Jb[15&r],s+Mr(e.P,t-2,n)}function nr(e,r,t,n){var s;return r?(s=it[2048-e.Y[t]>>>2],1==r?s+=it[e.ob[t]>>>2]:(s+=it[2048-e.ob[t]>>>2],s+=jr(e.Mb[t],r-2))):(s=it[e.Y[t]>>>2],s+=it[2048-e.Q[(t<<4)+n]>>>2]),s}function sr(e,r,t){return it[e.Y[r]>>>2]+it[e.Q[(r<<4)+t]>>>2]}function or(e){q(e),Zr(e.c),Gr(e.z),Gr(e.Q),Gr(e.S),Gr(e.Y),Gr(e.ob),Gr(e.Mb),Gr(e.sb),yr(e.y);for(var r=0;4>r;++r)Gr(e.D[r].db);vr(e.P,1<<e.N),vr(e.f,1<<e.N),Gr(e.M.db),e.I=0,e.W=0,e.m=0,e.o=0}function ir(e,r){r>0&&(T(e.b,r),e.o+=r)}function _r(e){var r=0;return e.A=I(e.b,e.k),e.A>0&&(r=e.k[e.A-2],r==e.j&&(r+=L(e.b,r-1,e.k[e.A-1],273-r))),++e.o,r}function ar(e){e.b&&e.L&&(e.b.ac=null,e.L=0)}function cr(e){ar(e),e.c.cc=null}function fr(e,r){e.R=r;for(var t=0;r>1<<t;++t);e.yb=2*t}function ur(e,r){var t=e.J;e.J=r,e.b&&t!=e.J&&(e.gb=-1,e.b=null)}function mr(e,r){e.Eb[0]=9*(5*e.N+e.U)+e.V<<24>>24;for(var t=0;4>t;++t)e.Eb[1+t]=e.R>>8*t<<24>>24;S(r,e.Eb,0,5)}function pr(e,r){if(e.Xb){Tr(e.c,e.z,(e.i<<4)+r,1),Tr(e.c,e.S,e.i,0),e.i=7>e.i?7:10,kr(e.P,e.c,0,r);var t=W(2);Dr(e.D[t],e.c,63),Wr(e.c,67108863,26),Ir(e.M,e.c,15)}}function dr(e){return 2048>e?ot[e]:2097152>e?ot[e>>10]+20:ot[e>>20]+40}function hr(e){return 131072>e?ot[e>>6]+12:134217728>e?ot[e>>16]+32:ot[e>>26]+52}function Pr(e,r,t,n){8>t?(Tr(r,e.T,0,0),Dr(e.ub[n],r,t)):(t-=8,Tr(r,e.T,0,1),8>t?(Tr(r,e.T,1,0),Dr(e.vb[n],r,t)):(Tr(r,e.T,1,1),Dr(e.Bb,r,t-8)))}function lr(e){e.T=t(2),e.ub=t(16),e.vb=t(16),e.Bb=xr({},8);for(var r=0;16>r;++r)e.ub[r]=xr({},3),e.vb[r]=xr({},3);return e}function vr(e,r){Gr(e.T);for(var t=0;r>t;++t)Gr(e.ub[t].db),Gr(e.vb[t].db);Gr(e.Bb.db)}function Br(e,r,t,n,s){var o,i,_,a,c;for(o=it[e.T[0]>>>2],i=it[2048-e.T[0]>>>2],_=i+it[e.T[1]>>>2],a=i+it[2048-e.T[1]>>>2],c=0,c=0;8>c;++c){if(c>=t)return;n[s+c]=o+Ar(e.ub[r],c)}for(;16>c;++c){if(c>=t)return;n[s+c]=_+Ar(e.vb[r],c-8)}for(;t>c;++c)n[s+c]=a+Ar(e.Bb,c-8-8)}function kr(e,r,t,n){Pr(e,r,t,n),0==--e.Hb[n]&&(Br(e,n,e.fb,e.Tb,272*n),e.Hb[n]=e.fb)}function Sr(e){return lr(e),e.Tb=[],e.Hb=[],e}function Mr(e,r,t){return e.Tb[272*t+r]}function br(e,r){for(var t=0;r>t;++t)Br(e,t,e.fb,e.Tb,272*t),e.Hb[t]=e.fb}function Er(e,r,n){var s,o;if(null==e.Cb||e.O!=n||e.qb!=r)for(e.qb=r,e.ec=(1<<r)-1,e.O=n,o=1<<e.O+e.qb,e.Cb=t(o),s=0;o>s;++s)e.Cb[s]=Lr({})}function gr(e,r,t){return e.Cb[((r&e.ec)<<e.O)+((255&t)>>>8-e.O)]}function yr(e){var r,t=1<<e.O+e.qb;for(r=0;t>r;++r)Gr(e.Cb[r].eb)}function Rr(e,r,t){var n,s,o=1;for(s=7;s>=0;--s)n=t>>s&1,Tr(r,e.eb,o,n),o=o<<1|n}function Fr(e,r,t,n){var s,o,i,_,a=1,c=1;for(o=7;o>=0;--o)s=n>>o&1,_=c,a&&(i=t>>o&1,_+=1+i<<8,a=i==s),Tr(r,e.eb,_,s),c=c<<1|s}function Lr(e){return e.eb=t(768),e}function zr(e,r,t,n){var s,o,i=1,_=7,a=0;if(r)for(;_>=0;--_)if(o=t>>_&1,s=n>>_&1,a+=jr(e.eb[(1+o<<8)+i],s),i=i<<1|s,o!=s){--_;break}for(;_>=0;--_)s=n>>_&1,a+=jr(e.eb[i],s),i=i<<1|s;return a}function Cr(e){e.g=-1,e.p=0}function wr(e){e.g=0,e.p=0}function xr(e,r){return e.cb=r,e.db=t(1<<r),e}function Dr(e,r,t){var n,s,o=1;for(s=e.cb;0!=s;)--s,n=t>>>s&1,Tr(r,e.db,o,n),o=o<<1|n}function Ar(e,r){var t,n,s=1,o=0;for(n=e.cb;0!=n;)--n,t=r>>>n&1,o+=jr(e.db[s],t),s=(s<<1)+t;return o}function Ir(e,r,t){var n,s,o=1;for(s=0;e.cb>s;++s)n=1&t,Tr(r,e.db,o,n),o=o<<1|n,t>>=1}function Or(e,r){var t,n,s=1,o=0;for(n=e.cb;0!=n;--n)t=1&r,r>>>=1,o+=jr(e.db[s],t),s=s<<1|t;return o}function Hr(e,r,t,n,s){var o,i,_=1;for(i=0;n>i;++i)o=1&s,Tr(t,e,r+_,o),_=_<<1|o,s>>=1}function Nr(e,r,t,n){var s,o,i=1,_=0;for(o=t;0!=o;--o)s=1&n,n>>>=1,_+=it[(2047&(e[r+i]-s^-s))>>>2],i=i<<1|s;return _}function Gr(e){for(var r=e.length-1;r>=0;--r)e[r]=1024}function Tr(e,r,t,o){var i,_=r[t];i=(e.lb>>>11)*_,o?(e.Qb=n(e.Qb,s(a(i),[4294967295,0])),e.lb-=i,r[t]=_-(_>>>5)<<16>>16):(e.lb=i,r[t]=_+(2048-_>>>5)<<16>>16),-16777216&e.lb||(e.lb<<=8,Vr(e))}function Wr(e,r,t){for(var s=t-1;s>=0;--s)e.lb>>>=1,1==(r>>>s&1)&&(e.Qb=n(e.Qb,a(e.lb))),-16777216&e.lb||(e.lb<<=8,Vr(e))}function Yr(e){return n(n(a(e.mb),e.Fb),[4,0])}function Zr(e){e.Fb=tt,e.Qb=tt,e.lb=-1,e.mb=1,e.fc=0}function Vr(e){var r,t=c(d(e.Qb,32));if(0!=t||o(e.Qb,[4278190080,0])<0){e.Fb=n(e.Fb,a(e.mb)),r=e.fc;do k(e.cc,r+t),r=255;while(0!=--e.mb);e.fc=c(e.Qb)>>>24}++e.mb,e.Qb=m(s(e.Qb,[16777215,0]),8)}function jr(e,r){return it[(2047&(e-r^-r))>>>2]}function Kr(e){var r,t,n,s=[],o=0,i=e.length;if("object"==typeof e)return e;for(M(e,0,i,s,0),n=0;i>n;++n)r=s[n],r>=1&&127>=r?++o:o+=!r||r>=128&&2047>=r?2:3;for(t=[],o=0,n=0;i>n;++n)r=s[n],r>=1&&127>=r?t[o++]=r<<24>>24:!r||r>=128&&2047>=r?(t[o++]=(192|r>>6&31)<<24>>24,t[o++]=(128|63&r)<<24>>24):(t[o++]=(224|r>>12&15)<<24>>24,t[o++]=(128|r>>6&63)<<24>>24,t[o++]=(128|63&r)<<24>>24);return t}function qr(e){return e[1]+e[0]}function Jr(e,t,n,s){function o(){try{for(var e,r=(new Date).getTime();V(a.c.Ub);)if(i=qr(a.c.Ub.Ob)/qr(a.c.gc),(new Date).getTime()-r>200)return s(i),Xr(o,0),0;s(1),e=B(a.c._b),Xr(n.bind(null,e),0)}catch(t){n(null,t)}}var i,_,a={},c=void 0===n&&void 0===s;if("function"!=typeof n&&(_=n,n=s=0),s=s||function(e){return void 0!==_?r(e,_):void 0},n=n||function(e,r){return void 0!==_?postMessage({action:Qr,cbn:_,result:e,error:r}):void 0},c){for(a.c=y({},Kr(e),_t(t));V(a.c.Ub););return B(a.c._b)}try{a.c=y({},Kr(e),_t(t)),s(0)}catch(f){return n(null,f)}Xr(o,0)}var Qr=1,Ur=3,Xr="function"==typeof setImmediate?setImmediate:setTimeout,$r=4294967296,et=[4294967295,-$r],rt=[0,-0x8000000000000000],tt=[0,0],nt=[1,0],st=function(){var e,r,t,n=[];for(e=0;256>e;++e){for(t=e,r=0;8>r;++r)0!=(1&t)?t=t>>>1^-306674912:t>>>=1;n[e]=t}return n}(),ot=function(){var e,r,t,n=2,s=[0,1];for(t=2;22>t;++t)for(r=1<<(t>>1)-1,e=0;r>e;++e,++n)s[n]=t<<24>>24;return s}(),it=function(){var e,r,t,n,s=[];for(r=8;r>=0;--r)for(n=1<<9-r-1,e=1<<9-r,t=n;e>t;++t)s[t]=(r<<6)+(e-t<<6>>>9-r-1);return s}(),_t=function(){var e=[{s:16,f:64,m:0},{s:20,f:64,m:0},{s:19,f:64,m:1},{s:20,f:64,m:1},{s:21,f:128,m:1},{s:22,f:128,m:1},{s:23,f:128,m:1},{s:24,f:255,m:1},{s:25,f:255,m:1}];return function(r){return e[r-1]||e[6]}}();return"undefined"==typeof onmessage||"undefined"!=typeof window&&void 0!==window.document||!function(){onmessage=function(r){r&&r.Zb&&r.Zb.action==Qr&&lz_c.compress(r.Zb.Zb,r.Zb.jc,r.Zb.cbn)}}(),{compress:Jr}}();this.LZMA=this.LZMA_WORKER=lz_c;
@@ -2770,7 +2793,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 		autoscroll: true, //automatically scroll to bottom on render
 		trimsentences: true, //trim to last punctuation
-		trimwhitespace: true, //trim trailing whitespace
+		trimwhitespace: false, //trim trailing whitespace
 		opmode: 1, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
 		adventure_is_action: false, //in adventure mode, determine story or action
 		adventure_context_mod: true, //extra injection for adventure mode
@@ -2791,6 +2814,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		last_selected_preset: 0,
 		enhanced_chat_ui: true,
 		multiline_replies: false,
+		allow_continue_chat: false,
 		idle_responses: 0,
 		idle_duration: 60,
 		export_settings: true, //affects if settings are included with the story and sharelinks
@@ -2800,8 +2824,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		max_length: 80,
 		auto_ctxlen: true,
 		auto_genamt: true,
-		rep_pen: 1.08,
-		rep_pen_range: 256,
+		rep_pen: 1.1,
+		rep_pen_range: 300,
 		rep_pen_slope: 0.7,
 		temperature: 0.7,
 		top_p: 0.92,
@@ -3730,16 +3754,16 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					localsettings.saved_claude_addr = tmp_claude2;
 
 					//backwards compat support for newlines
-					if(localsettings.instruct_has_newlines==true)
+					if(localsettings.instruct_has_newlines==true || (loaded_storyobj.savedsettings != null && loaded_storyobj.savedsettings.instruct_has_newlines==null&&loaded_storyobj.savedsettings.instruct_has_markdown==null))
 					{
 						localsettings.instruct_has_newlines = false;
 						if(!localsettings.instruct_starttag.includes("\\n"))
 						{
-							localsettings.instruct_starttag = "\\n\\n"+localsettings.instruct_starttag+"\\n\\n";
+							localsettings.instruct_starttag = "\\n"+localsettings.instruct_starttag+"\\n";
 						}
 						if(!localsettings.instruct_endtag.includes("\\n"))
 						{
-							localsettings.instruct_endtag = "\\n\\n"+localsettings.instruct_endtag+"\\n\\n";
+							localsettings.instruct_endtag = "\\n"+localsettings.instruct_endtag+"\\n";
 						}
 					}
 					hide_popups();
@@ -4779,6 +4803,15 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 				localsettings.saved_claude_key = custom_claude_key;
 				localsettings.saved_claude_addr = custom_claude_endpoint;
 				custom_claude_model = document.getElementById("custom_claude_model").value.trim();
+
+				if(document.getElementById("clauderenamecompat").checked)
+				{
+					localsettings.instruct_starttag = "Human:";
+					localsettings.chatname =  "Human";
+					localsettings.instruct_endtag = "Assistant:";
+					localsettings.chatopponent = "Assistant";
+				}
+
 				selected_models = [{ "performance": 100.0, "queued": 0.0, "eta": 0, "name": custom_claude_model, "count": 1 }];
 				selected_workers = [];
 				if (perfdata == null) {
@@ -5184,6 +5217,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		document.getElementById("generate_images").value = localsettings.generate_images;
 		document.getElementById("enhanced_chat_ui").checked = localsettings.enhanced_chat_ui;
 		document.getElementById("multiline_replies").checked = localsettings.multiline_replies;
+		document.getElementById("allow_continue_chat").checked = localsettings.allow_continue_chat;
 		document.getElementById("idle_responses").value = localsettings.idle_responses;
 		document.getElementById("idle_duration").value = localsettings.idle_duration;
 		document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
@@ -5341,6 +5375,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
 		localsettings.enhanced_chat_ui = (document.getElementById("enhanced_chat_ui").checked ? true : false);
 		localsettings.multiline_replies = (document.getElementById("multiline_replies").checked ? true : false);
+		localsettings.allow_continue_chat = (document.getElementById("allow_continue_chat").checked ? true : false);
 		localsettings.idle_responses = document.getElementById("idle_responses").value;
 		localsettings.idle_duration = document.getElementById("idle_duration").value;
 		localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
@@ -5646,7 +5681,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 	function end_trim_to_sentence(input,include_newline=false) {
 		let last = -1;
-		let enders = ['.', '!', '?', '`', '*', '"', ')', '}', '`', ']'];
+		let enders = ['.', '!', '?', '`', '*', '"', ')', '}', '`', ']', ';'];
 		for (let i = 0; i < enders.length; ++i)
 		{
 			last = Math.max(last, input.lastIndexOf(enders[i]));
@@ -5899,9 +5934,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty
 			truncated_context = truncated_context.replace(/\xA0/g,' '); //replace non breaking space nbsp
 
-			//trim trailing whitespace, but not newlines
+			//trim trailing whitespace, and multiple newlines
 			if (localsettings.trimwhitespace) {
 				truncated_context = truncated_context.replace(/[\t ]+$/, '');
+				truncated_context = truncated_context.replace(/[\r\n]+/g, '\n');
 			}
 
 
@@ -5931,7 +5967,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 				let me = localsettings.chatname;
 				if (co == null || co == "") {
-					co = defaultchatopponent;
+					co = "";
 				}
 
 				//examine context to try to determine if there's an existing botname
@@ -5940,13 +5976,19 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 				var foundopponent = tempfullsearchable.match(othernamesregex);
 
 				//if co is default, and we found an opponent, use their name instead
-				if (co == defaultchatopponent && foundopponent != null && foundopponent.length > 0) {
-					co = foundopponent[0].replace(": ", "");
+				if (co == "" && foundopponent != null && foundopponent.length > 0) {
+					let trimmed = foundopponent[0].replace(": ", "");
+					trimmed = trimmed.trim();
+					if(trimmed!=""){ co = trimmed; }
 				}
 
 				if (current_anote.length == 0 && current_memory.length == 0) {
 					if (gametext_arr.length > 0 && gametext_arr[0].startsWith("\n" + me + ": ")) {
 						let injected = "[The following is an interesting chat message log between " + me + " and " + co + ".]\n\n" + localsettings.chatname + ": Hi.\n" + co + ": Hello.";
+						if(co=="")
+						{
+							injected = "[The following is an interesting chat message log between " + me + " and someone else.]\n\n" + localsettings.chatname + ": Hi.";
+						}
 						if(hasMulti)
 						{
 							injected = "[The following is an interesting chat message log between " + me + " and multiple others.]\n\n" + localsettings.chatname + ": Hi.";
@@ -5956,9 +5998,26 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 				}
 
 				//if we can infer the name of our chat opponent, inject it to force bot response after ours
-				co = replaceAll(co,"\n","");
-				pending_context_preinjection = "\n"+co + ":";
-				truncated_context += pending_context_preinjection;
+				if(co!="")
+				{
+					co = replaceAll(co,"\n","");
+					pending_context_preinjection = "\n"+co + ":";
+				}
+				else
+				{
+					pending_context_preinjection = "\n";
+				}
+
+				if(localsettings.allow_continue_chat && newgen.trim() == "")
+				{
+					//allow continuing a previous bot reply instead of starting a new row.
+					pending_context_preinjection = "";
+				}
+				else
+				{
+					//start a new bot response
+					truncated_context += pending_context_preinjection;
+				}
 
 			}
 
@@ -6621,8 +6680,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			}
 		}
 
-		//always trim incomplete sentences for adventure and chat
-		if (localsettings.opmode == 2 || localsettings.opmode == 3 || localsettings.trimsentences == true) {
+		//always trim incomplete sentences for adventure and chat (if not multiline)
+		if (localsettings.opmode == 2 || (localsettings.opmode == 3 && !localsettings.allow_continue_chat) || localsettings.trimsentences == true) {
 			gentxt = end_trim_to_sentence(gentxt,true);
 		}
 
@@ -6641,8 +6700,17 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		//if we are in chatmode, truncate to my first response
 		if (localsettings.opmode == 3) {
 			let foundMyName = gentxt.indexOf(localsettings.chatname + "\:");
+			let foundMyName2 = gentxt.indexOf("\n"+localsettings.chatname+" ");
 			let splitresponse = [];
-			if (foundMyName == -1) //if no name found
+			if (foundMyName != -1)
+			{
+				splitresponse = gentxt.split(localsettings.chatname + "\:");
+			}
+			else if (foundMyName2 != -1) //added by henky request, trigger even without colon
+			{
+				splitresponse = gentxt.split("\n"+localsettings.chatname+" ");
+			}
+			else //if no name found
 			{
 				if(localsettings.multiline_replies)
 				{
@@ -6661,9 +6729,6 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					}
 				}
 			}
-			else {
-				splitresponse = gentxt.split(localsettings.chatname + "\:");
-			}
 
 			let startpart = splitresponse[0];
 			if (startpart.length > 0 && startpart[startpart.length - 1] == "\n") {
@@ -6710,7 +6775,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			window.speechSynthesis.speak(utterance);
 		}
 
-		gametext_arr.push(gentxt);
+		if(gentxt!="")
+		{
+			gametext_arr.push(gentxt);
+		}
 		if(localsettings.beep_on)
 		{
 			playbeep();
@@ -7938,6 +8006,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	function btn_retry() {
 		//do not retry if story is only 1 part long
 		if (pending_response_id == "" && gametext_arr.length > 1) {
+			let boxtextstash = document.getElementById("input_text").value;
+			document.getElementById("input_text").value = "";
 			let temp = gametext_arr[gametext_arr.length-1];
 			redo_prev_text = "";
 			retry_prev_text = "";
@@ -7945,6 +8015,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			submit_generation();
 			retry_prev_text = temp;
 			redo_arr = [];
+			document.getElementById("input_text").value = boxtextstash;
 		}
 	}
 
@@ -7998,7 +8069,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 								</li>
 
 								<li class="nav-item hidden" id="topbtn_newgame">
-									<a class="nav-link" href="#" onclick="display_newgame()">New Game</a>
+									<a class="nav-link" href="#" onclick="display_newgame()">New Session</a>
 								</li>
 
 								<li class="nav-item hidden" id="topbtn_scenarios">
@@ -8300,10 +8371,14 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 				Model Choice:<br>
 				<select style="padding:4px;" class="form-control" id="custom_claude_model">
 					<option value="claude-v1" selected="selected">claude-v1</option>
+					<option value="claude-v1.2">claude-v1.2</option>
 					<option value="claude-v1-100k">claude-v1-100k</option>
 					<option value="claude-instant-v1">claude-instant-v1</option>
 					<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
+					<option value="claude-2">claude-2</option>
 				</select>
+				<input type="checkbox" id="clauderenamecompat" onchange="">
+				<div class="box-label" title="Rename User and Bot tags to work with claude">Claude Compatibility Rename Fix</div>
 			</div>
 			<div class="popupfooter">
 				<button type="button" class="btn btn-primary" onclick="connect_custom_endpoint()">Connect</button>
@@ -8572,6 +8647,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 							<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div>
 							<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
 							</div>
+							<div class="settinglabel">
+							<div class="justifyleft settingsmall" title="Allow incomplete AI chat replies, which can be continued by pressing submit again. Not recommended.">Continue Bot Replies</div>
+							<input type="checkbox" id="allow_continue_chat" style="margin:0px 0 0;">
+							</div>
 
 							</div>
 							<div id="adventuresection" class="settinglabel hidden" style="padding-top: 3px;">

From e33a58b74af594e0b2f46fa2923c7b5ad758c096 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 23 Jul 2023 20:45:48 +0200
Subject: [PATCH 046/107] Adventure stoppers = regex

---
 modeling/stoppers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 3f277f48..cdf5b6e2 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -127,7 +127,7 @@ class Stoppers:
         if "completed" not in model.gen_state:
             model.gen_state["completed"] = [False] * len(input_ids)
         if utils.koboldai_vars.adventure:
-            extra_options = ["> You", "You:", "\n\n You", "\n\nYou", ". You"]
+            extra_options = [">", "\n>"]
             for option in extra_options:
                 if option not in utils.koboldai_vars.stop_sequence:
                     utils.koboldai_vars.stop_sequence.append(option)

From 73953068c0e0752094843e17151471056aa132f2 Mon Sep 17 00:00:00 2001
From: 0cc4m <picard12@live.de>
Date: Sun, 23 Jul 2023 22:12:31 +0200
Subject: [PATCH 047/107] Remove exllama backend, pending further fixes

---
 modeling/inference_models/exllama/class.py | 446 ---------------------
 1 file changed, 446 deletions(-)
 delete mode 100644 modeling/inference_models/exllama/class.py

diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py
deleted file mode 100644
index e3c7a874..00000000
--- a/modeling/inference_models/exllama/class.py
+++ /dev/null
@@ -1,446 +0,0 @@
-from __future__ import annotations
-
-import time, json
-import torch
-import requests
-import numpy as np
-from typing import List, Optional, Union
-import os
-import glob
-from pathlib import Path
-import re
-import warnings
-import gc
-
-import utils
-from logger import logger
-
-from modeling import warpers
-from modeling.warpers import Warper
-from modeling.stoppers import Stoppers
-from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-    ModelCapabilities,
-)
-
-from modeling.tokenizer import GenericTokenizer
-
-from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
-from transformers import LlamaTokenizer
-from exllama.generator import ExLlamaGenerator
-
-model_backend_type = "GPTQ"
-model_backend_name = "ExLlama"
-
-# When set to true, messages will appear in the console if samplers are not
-# changing the scores. Keep in mind some samplers don't always change the
-# scores for each token.
-LOG_SAMPLER_NO_EFFECT = False
-
-
-def load_model_gptq_settings(path):
-    try:
-        js = json.load(open(path + "/config.json", "r"))
-    except Exception as e:
-        return False, False
-
-    gptq_model = False
-    gptq_file = False
-
-    gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors"))
-    if "gptq_bits" in js:
-        gptq_model = True
-        gptq_file = os.path.join(path, "model.safetensors")
-    elif gptq_legacy_files:
-        gptq_model = True
-        gptq_file = gptq_legacy_files[0]
-        fname = Path(gptq_file).parts[-1]
-        g = re.findall("(?:4bit)(?:-)(\\d+)(?:g-?)", fname)
-
-    return gptq_model, gptq_file
-
-
-class model_backend(InferenceModel):
-    def __init__(self) -> None:
-        super().__init__()
-        self.model_config = None
-
-        self.model = None
-        self.tokenizer = None
-        self.cache = None
-        self.generator = None
-
-        self.model_name = ""
-        self.path = None
-
-        self.post_token_hooks = [
-            PostTokenHooks.stream_tokens,
-        ]
-
-        self.stopper_hooks = [
-            Stoppers.core_stopper,
-            Stoppers.dynamic_wi_scanner,
-            Stoppers.singleline_stopper,
-            Stoppers.chat_mode_stopper,
-            Stoppers.stop_sequence_stopper,
-        ]
-
-        self.capabilties = ModelCapabilities(
-            embedding_manipulation=False,
-            post_token_hooks=True,
-            stopper_hooks=False,
-            post_token_probs=False,
-        )
-
-    def is_valid(self, model_name, model_path, menu_path):
-        gptq_model, _ = load_model_gptq_settings(model_path)
-        try:
-            self.model_config = self._load_config(model_name, model_path)
-            return self.model_config and gptq_model
-        except:
-            return False
-
-    def get_local_model_path(self):
-        return self.path or os.path.join("models", self.model_name.replace("/", "_"))
-
-    def _load_config(self, model_name, model_path):
-        config = False
-        if model_path is not None and os.path.exists(model_path):
-            config = ExLlamaConfig(os.path.join(model_path, "config.json"))
-        if not config and os.path.exists("models/{}".format(model_name.replace('/', '_'))):
-            config = ExLlamaConfig(os.path.join("models/{}".format(model_name.replace('/', '_')), "config.json"))
-
-        return config
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self.model = self._get_model(self.get_local_model_path(), {})
-        self.tokenizer = self._get_tokenizer(self.get_local_model_path())
-
-        self.cache = ExLlamaCache(self.model)
-
-        self.generator = ExLlamaGenerator(self.model, self.tokenizer.tokenizer, self.cache)
-
-    def _post_load(self) -> None:
-        # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
-        self.tokenizer.add_bos_token = False
-
-        # HF transformers no longer supports decode_with_prefix_space
-        # We work around this by wrapping decode, encode, and __call__
-        # with versions that work around the 'prefix space' misfeature
-        # of sentencepiece.
-        vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
-        has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
-
-        # Wrap 'decode' with a method that always returns text starting with a space
-        # when the head token starts with a space. This is what 'decode_with_prefix_space'
-        # used to do, and we implement it using the same technique (building a cache of
-        # tokens that should have a prefix space, and then prepending a space if the first
-        # token is in this set.) We also work around a bizarre behavior in which decoding
-        # a single token 13 behaves differently than decoding a squence containing only [13].
-        original_decode = type(self.tokenizer.tokenizer).decode
-        def decode_wrapper(self, token_ids, *args, **kwargs):
-            first = None
-            # Note, the code below that wraps single-value token_ids in a list
-            # is to work around this wonky behavior:
-            #   >>> t.decode(13)
-            #   '<0x0A>'
-            #   >>> t.decode([13])
-            #   '\n'
-            # Not doing this causes token streaming to receive <0x0A> characters
-            # instead of newlines.
-            if isinstance(token_ids, int):
-                first = token_ids
-                token_ids = [first]
-            elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
-                # Tensors don't support the Python standard of 'empty is False'
-                # and the special case of dimension 0 tensors also needs to be
-                # handled separately.
-                if token_ids.dim() == 0:
-                    first = int(token_ids.item())
-                    token_ids = [first]
-                elif len(token_ids) > 0:
-                    first = int(token_ids[0])
-            elif token_ids is not None and len(token_ids) > 0:
-                first = token_ids[0]
-            result = original_decode(self, token_ids, *args, **kwargs)
-            if first is not None and first in has_prefix_space:
-                result = " " + result
-            return result
-        # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
-        object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
-
-        # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
-        # The problem is that "Bob" at the start of text is encoded as if it is
-        # " Bob". This creates a problem because it means you can't split text, encode
-        # the pieces, concatenate the tokens, decode them, and get the original text back.
-        # The workaround is to prepend a known token that (1) starts with a space; and
-        # (2) is not the prefix of any other token. After searching through the vocab
-        # " ," (space comma) is the only token containing only printable ascii characters
-        # that fits this bill. By prepending ',' to the text, the original encode
-        # method always returns [1919, ...], where the tail of the sequence is the
-        # actual encoded result we want without the prefix space behavior.
-        original_encode = type(self.tokenizer.tokenizer).encode
-        def encode_wrapper(self, text, *args, **kwargs):
-            if type(text) is str:
-                text = ',' + text
-                result = original_encode(self, text, *args, **kwargs)
-                result = result[1:]
-            else:
-                result = original_encode(self, text, *args, **kwargs)
-            return result
-        object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
-
-        # Since 'encode' is documented as being deprecated, also override __call__.
-        # This doesn't appear to currently be used by KoboldAI, but doing so
-        # in case someone uses it in the future.
-        original_call = type(self.tokenizer.tokenizer).__call__
-        def call_wrapper(self, text, *args, **kwargs):
-            if type(text) is str:
-                text = ',' + text
-                result = original_call(self, text, *args, **kwargs)
-                result = result[1:]
-            else:
-                result = original_call(self, text, *args, **kwargs)
-            return result
-        object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
-
-    def unload(self):
-        self.model_config = None
-
-        self.model = None
-        self.tokenizer = None
-        self.cache = None
-        self.generator = None
-
-        self.model_name = ""
-        self.path = None
-
-        with torch.no_grad():
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
-                for tensor in gc.get_objects():
-                    try:
-                        if torch.is_tensor(tensor):
-                            tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
-                    except:
-                        pass
-        gc.collect()
-        try:
-            with torch.no_grad():
-                torch.cuda.empty_cache()
-        except:
-            pass
-
-    def _apply_warpers(
-        self, scores: torch.Tensor, input_ids: torch.Tensor
-    ) -> torch.Tensor:
-        warpers.update_settings()
-
-        if LOG_SAMPLER_NO_EFFECT:
-            pre = torch.Tensor(scores)
-
-        for sid in utils.koboldai_vars.sampler_order:
-            warper = Warper.from_id(sid)
-
-            if not warper.value_is_valid():
-                continue
-
-            if warper == warpers.RepetitionPenalty:
-                # Rep pen needs more data than other samplers
-                scores = warper.torch(scores, input_ids=input_ids)
-            else:
-                scores = warper.torch(scores)
-
-            assert scores is not None, f"Scores are None; warper '{warper}' is to blame"
-
-            if LOG_SAMPLER_NO_EFFECT:
-                if torch.equal(pre, scores):
-                    logger.info(warper, "had no effect on the scores.")
-                pre = torch.Tensor(scores)
-        return scores
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-        if seed:
-            torch.manual_seed(seed)
-
-        if not isinstance(prompt_tokens, torch.Tensor):
-            gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None]
-        else:
-            gen_in = prompt_tokens
-
-        self.generator.gen_begin_reuse(gen_in)
-
-        for i in range(max_new):
-            logits = self.model.forward(self.generator.sequence[:, -1:], self.generator.cache)
-            logits[:, :, self.tokenizer.bos_token_id] = -10000.0
-
-            logits = torch.unsqueeze(logits[0, -1, :], 0)
-
-            scores = self._apply_warpers(logits, gen_in)
-
-            scores = torch.softmax(scores, dim=-1)
-
-            token = torch.multinomial(scores, 1)
-
-            self.generator.gen_accept_token(token)
-
-            self._post_token_gen(self.generator.sequence)
-
-            utils.koboldai_vars.generated_tkns += 1
-
-            if token.item() == self.tokenizer.eos_token_id: break
-
-        utils.koboldai_vars.generated_tkns = max_new
-
-        return GenerationResult(
-            model=self,
-            out_batches=np.array(
-                self.generator.sequence[:, gen_in.size(1):],
-            ),
-            prompt=prompt_tokens,
-            is_whole_generation=True,
-            single_line=single_line,
-        )
-
-    def _get_model(self, location: str, tf_kwargs: Dict):
-        if not self.model_config:
-            ExLlamaConfig(os.path.join(location, "config.json"))
-
-        _, self.model_config.model_path = load_model_gptq_settings(location)
-        # self.model_config.gpu_peer_fix = True
-        return ExLlama(self.model_config)
-
-    def _get_tokenizer(self, location: str):
-        tokenizer = GenericTokenizer(LlamaTokenizer.from_pretrained(location))
-        tokenizer._koboldai_header = tokenizer.encode("")
-        return tokenizer
-
-    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
-        requested_parameters = []
-        gpu_count = torch.cuda.device_count()
-        layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
-        requested_parameters.append({
-                                        "uitype": "Valid Display",
-                                        "unit": "text",
-                                        "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value
-                                        "id": "valid_layers",
-                                        "max": layer_count,
-                                        "step": 1,
-                                        "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
-                                        "menu_path": "Layers",
-                                        "extra_classes": "",
-                                        "refresh_model_inputs": False
-                                    })
-        for i in range(gpu_count):
-            requested_parameters.append({
-                                            "uitype": "slider",
-                                            "unit": "int",
-                                            "label": "{} Layers".format(torch.cuda.get_device_name(i)),
-                                            "id": "{}_Layers".format(i),
-                                            "min": 0,
-                                            "max": layer_count,
-                                            "step": 1,
-                                            "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
-                                            "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                            "default": [layer_count if i == 0 else 0],
-                                            "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
-                                            "menu_path": "Layers",
-                                            "extra_classes": "",
-                                            "refresh_model_inputs": False
-                                        })
-
-        requested_parameters.append({
-            "uitype": "slider",
-            "unit": "int",
-            "label": "Maximum Context",
-            "id": "max_ctx",
-            "min": 2048,
-            "max": 16384,
-            "step": 512,
-            "default": 2048,
-            "tooltip": "The maximum context size the model supports",
-            "menu_path": "Configuration",
-            "extra_classes": "",
-            "refresh_model_inputs": False
-        })
-
-        requested_parameters.append({
-            "uitype": "slider",
-            "unit": "float",
-            "label": "Embedding Compression",
-            "id": "compress_emb",
-            "min": 1,
-            "max": 8,
-            "step": 0.25,
-            "default": 1,
-            "tooltip": "If the model requires compressed embeddings, set them here",
-            "menu_path": "Configuration",
-            "extra_classes": "",
-            "refresh_model_inputs": False
-        })
-
-        requested_parameters.append({
-            "uitype": "slider",
-            "unit": "float",
-            "label": "NTK alpha",
-            "id": "ntk_alpha",
-            "min": 1,
-            "max": 32,
-            "step": 0.25,
-            "default": 1,
-            "tooltip": "NTK alpha value",
-            "menu_path": "Configuration",
-            "extra_classes": "",
-            "refresh_model_inputs": False
-        })
-
-        return requested_parameters
-
-    def set_input_parameters(self, parameters):
-        gpu_count = torch.cuda.device_count()
-        layers = []
-        for i in range(gpu_count):
-            if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
-                layers.append(int(parameters["{}_Layers".format(i)]))
-            elif isinstance(parameters["{}_Layers".format(i)], str):
-                 layers.append(None)
-            else:
-                layers.append(parameters["{}_Layers".format(i)])
-
-        self.layers = layers
-        self.model_config.device_map.layers = []
-        for i, l in enumerate(layers):
-            if l > 0:
-                self.model_config.device_map.layers.extend([f"cuda:{i}"] * l)
-        self.model_config.device_map.lm_head = "cuda:0"
-        self.model_config.device_map.norm = "cuda:0"
-
-        self.model_config.max_seq_len = parameters["max_ctx"]
-        self.model_config.compress_pos_emb = parameters["compress_emb"]
-        self.model_config.alpha_value = parameters["ntk_alpha"]
-        self.model_config.calculate_rotary_embedding_base()
-
-        # Disable half2 for HIP
-        self.model_config.rmsnorm_no_half2 = bool(torch.version.hip)
-        self.model_config.rope_no_half2 = bool(torch.version.hip)
-        self.model_config.matmul_no_half2 = bool(torch.version.hip)
-        self.model_config.silu_no_half2 = bool(torch.version.hip)
-
-        # Disable scaled_dot_product_attention if torch version < 2
-        if torch.__version__.startswith("1."):
-            self.model_config.sdp_thd = 0
-
-        self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
-        self.path = parameters['path'] if 'path' in parameters else None

From 89637ae9d7226cbc039af571fbefc6e4feb31b66 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 23 Jul 2023 22:51:47 +0200
Subject: [PATCH 048/107] GPTQ Requirements

---
 environments/huggingface.yml | 4 ++--
 requirements.txt             | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index e97f3e2e..89cd4c89 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -47,8 +47,8 @@ dependencies:
     - pydub
     - diffusers
     - git+https://github.com/0cc4m/hf_bleeding_edge/
-    - --find-links=https://0cc4m.github.io/GPTQ-for-LLaMa/gptq-whl-links.html
-    - gptq_koboldai==0.0.6
+    - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
+    - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - einops
     - peft==0.3.0
     - scipy
diff --git a/requirements.txt b/requirements.txt
index 3978eda3..c3487bb9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -39,7 +39,8 @@ pytest-metadata==2.0.4
 requests-mock==1.10.0
 safetensors==0.3.1
 git+https://github.com/0cc4m/hf_bleeding_edge/
---find-links=https://0cc4m.github.io/KoboldAI/gptq-whl-links.html gptq_koboldai==0.0.4
+https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
+https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
 einops
 peft==0.3.0
 scipy
\ No newline at end of file

From 3aa677ce117f59c80feaad04a06473e1263d62ad Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 23 Jul 2023 16:04:23 -0500
Subject: [PATCH 049/107] Indexed prioritization

not a great way to get that data
---
 aiserver.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 8f04cd07..0583c5b8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -630,7 +630,9 @@ model_backends = {}
 model_backend_module_names = {}
 model_backend_type_crosswalk = {}
 
-PRIORITIZED_BACKEND_MODULES = ["generic_hf_torch"]
+PRIORITIZED_BACKEND_MODULES = {
+    "generic_hf_torch": 1
+}
 
 for module in os.listdir("./modeling/inference_models"):
     if module == '__pycache__':
@@ -666,10 +668,15 @@ for module in os.listdir("./modeling/inference_models"):
         model_backend_module_names[backend_name] = module
 
         if backend_type in model_backend_type_crosswalk:
-            if module in PRIORITIZED_BACKEND_MODULES:
-                model_backend_type_crosswalk[backend_type].insert(0, backend_name)
-            else:
-                model_backend_type_crosswalk[backend_type].append(backend_name)
+            model_backend_type_crosswalk[backend_type].append(backend_name)
+            model_backend_type_crosswalk[backend_type] = list(sorted(
+                model_backend_type_crosswalk[backend_type],
+                key=lambda name: PRIORITIZED_BACKEND_MODULES.get(
+                    [mod for b_name, mod in model_backend_module_names.items() if b_name == name][0],
+                    0
+                ),
+                reverse=True
+            ))
         else:
             model_backend_type_crosswalk[backend_type] = [backend_name]
 

From 0f913275a93bb2e2ba4d557625845e79f7270018 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 23 Jul 2023 23:08:11 +0200
Subject: [PATCH 050/107] 4-bit as Default

---
 environments/huggingface.yml                        | 2 --
 modeling/inference_models/generic_hf_torch/class.py | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 89cd4c89..64224dc3 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -52,5 +52,3 @@ dependencies:
     - einops
     - peft==0.3.0
     - scipy
-    - --find-links=https://0cc4m.github.io/exllama/exllama-whl-links.html
-    - exllama==0.0.6
diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 1bf38b99..a0fdedc6 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -61,7 +61,7 @@ class model_backend(HFTorchInferenceModel):
                                             "default": temp['quantization'] if 'quantization' in temp else 'none',
                                             "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
                                             "menu_path": "Layers",
-                                            "children": [{'text': 'None', 'value':'none'},{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}],
+                                            "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
                                             "extra_classes": "",
                                             "refresh_model_inputs": False
                                         })

From 70dddf9fdc14f8c7a89a2b47b0279e3d883a526c Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 23 Jul 2023 23:22:02 +0200
Subject: [PATCH 051/107] Prioritize GPTQ

---
 aiserver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 0583c5b8..39def975 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -631,7 +631,8 @@ model_backend_module_names = {}
 model_backend_type_crosswalk = {}
 
 PRIORITIZED_BACKEND_MODULES = {
-    "generic_hf_torch": 1
+    "gptq_hf_torch": 1,
+    "generic_hf_torch": 2
 }
 
 for module in os.listdir("./modeling/inference_models"):

From 3409853dfca552740d5dca7569d96b6d6fbfe607 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 23 Jul 2023 23:26:35 +0200
Subject: [PATCH 052/107] Remove GPTQ for Colab

---
 requirements.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c3487bb9..f58c3c13 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -39,8 +39,6 @@ pytest-metadata==2.0.4
 requests-mock==1.10.0
 safetensors==0.3.1
 git+https://github.com/0cc4m/hf_bleeding_edge/
-https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
 einops
 peft==0.3.0
 scipy
\ No newline at end of file

From a963c97acba850bea0ebb13b6fd2510834fe7a8f Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 24 Jul 2023 00:06:20 +0200
Subject: [PATCH 053/107] Make 4-bit the default part 2

---
 modeling/inference_models/generic_hf_torch/class.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index a0fdedc6..a0ac9b55 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -58,7 +58,7 @@ class model_backend(HFTorchInferenceModel):
                                             "unit": "text",
                                             "label": "Quantization",
                                             "id": "quantization",
-                                            "default": temp['quantization'] if 'quantization' in temp else 'none',
+                                            "default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit',
                                             "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
                                             "menu_path": "Layers",
                                             "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
@@ -66,7 +66,7 @@ class model_backend(HFTorchInferenceModel):
                                             "refresh_model_inputs": False
                                         })
         else:
-            logger.warning("Bitsandbytes is not installed, you can not use Huggingface models in 4-bit")
+            logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models")
         return requested_parameters
  
     def set_input_parameters(self, parameters):

From 8de610df8cc245c704151f551e6eb3007e3c52eb Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 23 Jul 2023 17:32:52 -0500
Subject: [PATCH 054/107] Streaming: Rework single-gen streaming

Now has its own packet and element seperate from actions. Fixes several
bugs related to desyncing. Also adds smooth typing effect
(fixes https://github.com/henk717/KoboldAI/issues/263)
---
 aiserver.py          |  2 ++
 gensettings.py       | 17 +++++++++
 koboldai_settings.py | 42 ++++++++--------------
 static/koboldai.js   | 83 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 28 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0583c5b8..bea0d662 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3470,6 +3470,8 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                 set_aibusy(0)
                 emit('from_server', {'cmd': 'scrolldown', 'data': ''}, broadcast=True, room="UI_1")
                 break
+    # Clean up token stream
+    emit("stream_tokens", None, broadcast=True, room="UI_2")
 
 def apiactionsubmit_generate(txt, minimum, maximum):
     koboldai_vars.generated_tkns = 0
diff --git a/gensettings.py b/gensettings.py
index 8d68b4b5..09e1abcb 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -413,6 +413,23 @@ gensettingstf = [
     ,
     "ui_level": 2
  	},
+    {
+    "UI_V2_Only": True,
+ 	"uitype": "toggle",
+ 	"unit": "bool",
+ 	"label": "Smooth Streaming",
+ 	"id": "smoothstreaming",
+ 	"min": 0,
+ 	"max": 1,
+ 	"step": 1,
+ 	"default": 0,
+	"tooltip": "Makes Token Streaming type in characters, not tokens. Note that this is purely visual, and will likely increase delay in seeing the tokens.",
+    "menu_path": "Interface",
+    "sub_path": "UI",
+    "classname": "user",
+    "name": "smooth_streaming",
+    "ui_level": 1
+ 	},
     {
  	"uitype": "toggle",
  	"unit": "bool",
diff --git a/koboldai_settings.py b/koboldai_settings.py
index ebd8c019..25f4ea84 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1155,6 +1155,7 @@ class user_settings(settings):
         self.nogenmod    = False
         self.debug       = False    # If set to true, will send debug information to the client for display
         self.output_streaming = True
+        self.smooth_streaming = False
         self.show_probs = False # Whether or not to show token probabilities
         self.beep_on_complete = False
         self.img_gen_priority = 1
@@ -1889,34 +1890,19 @@ class KoboldStoryRegister(object):
                     process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
         else:
             #We're streaming single options so our output is our selected
-            #First we need to see if this is actually the prompt. If so we'll just not do streaming:
-            if self.story_settings.prompt != "":
-                if self.action_count+1 in self.actions:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text']))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1]['Selected Text'] = "{}{}".format(self.actions[self.action_count+1]['Selected Text'], text_list[0])
-                    self.actions[self.action_count+1]['Selected Text Length'] = selected_text_length
-                else:
-                    if self._koboldai_vars.tokenizer is not None:
-                        selected_text_length = len(self._koboldai_vars.tokenizer.encode(text_list[0]))
-                    else:
-                        selected_text_length = 0
-                    self.actions[self.action_count+1] = {"Selected Text": text_list[0], "Selected Text Length": selected_text_length, "Options": [], "Time": int(time.time())}
-                
-                
-                
-                if self._koboldai_vars.tokenizer is not None:
-                    if len(self._koboldai_vars.tokenizer.encode(self.actions[self.action_count+1]['Selected Text'])) != self._koboldai_vars.genamt:
-                        #ui1
-                        if queue is not None:
-                            queue.put(["from_server", {"cmd": "streamtoken", "data": [{
-                                "decoded": text_list[0],
-                                "probabilities": self.probability_buffer
-                            }]}, {"broadcast":True, "room":"UI_1"}])
-                        #process_variable_changes(self._socketio, "actions", "Options", {"id": self.action_count+1, "options": self.actions[self.action_count+1]["Options"]}, {"id": self.action_count+1, "options": None})
-                        process_variable_changes(self._socketio, "story", 'actions', {"id": self.action_count+1, 'action':  self.actions[self.action_count+1]}, None)
+            queue.put(["stream_tokens", text_list, {"broadcast": True, "room": "UI_2"}])
+
+            # UI1
+            queue.put([
+                "from_server", {
+                    "cmd": "streamtoken",
+                    "data": [{
+                        "decoded": text_list[0],
+                        "probabilities": self.probability_buffer
+                    }],
+                },
+                {"broadcast":True, "room": "UI_1"}
+            ])
     
     def set_probabilities(self, probabilities, action_id=None):
         self.probability_buffer = probabilities
diff --git a/static/koboldai.js b/static/koboldai.js
index 83773799..83fba5b7 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -37,6 +37,7 @@ socket.on("debug_message", function(data){console.log(data);});
 socket.on("scratchpad_response", recieveScratchpadResponse);
 socket.on("show_error_notification", function(data) { reportError(data.title, data.text) });
 socket.on("generated_wi", showGeneratedWIData);
+socket.on("stream_tokens", stream_tokens);
 //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});
 
 // Must be done before any elements are made; we track their changes.
@@ -85,6 +86,16 @@ var initial_socketio_connection_occured = false;
 var selected_model_data;
 var privacy_mode_enabled = false;
 
+var streaming = {
+	windowOpen: false,
+	buffer: "",
+	time: {
+		msBuffer: [10],
+		preTime: null,
+	},
+	typeyTimeout: null,
+};
+
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
 let sync_hooks = [];
@@ -518,6 +529,7 @@ function process_actions_data(data) {
 	game_text_scroll_timeout = setTimeout(run_infinite_scroll_update.bind(null, action_type, actions, first_action), 200);
 	clearTimeout(auto_loader_timeout);
 	
+	streaming.windowOpen = true;
 	
 	hide_show_prompt();
 	//console.log("Took "+((Date.now()-start_time)/1000)+"s to process");
@@ -3348,6 +3360,77 @@ function update_game_text(id, new_text) {
 	
 }
 
+function stream_tokens(tokens) {
+	// NOTE: This is only for genamt/batch size 1.
+	const smoothStreamingEnabled = $el("#user_smooth_streaming").checked;
+
+	let streamBuffer = $el("#token-stream-buffer");
+
+	if (!streaming.windowOpen) {
+		// Reject tokens sent after the streaming window is closed
+		return;
+	}
+
+	if (!tokens) {
+		// Server told us to close up shop!
+		streaming.windowOpen = false;
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+		if (streamBuffer) streamBuffer.remove();
+		return;
+	}
+
+	if (!streamBuffer) {
+		// This should happen once at the beginning of the stream
+		streamBuffer = $e("span", $el(".gametext"), {
+			id: "token-stream-buffer",
+			classes: ["within_max_length"]
+		});
+	}
+
+	if (!smoothStreamingEnabled && streaming.typeyTimeout) {
+		streaming.buffer = "";
+		clearTimeout(streaming.typeyTimeout);
+		streaming.typeyTimeout = null;
+	}
+
+	if (!streaming.typeyTimeout && smoothStreamingEnabled) {
+		function _char() {
+			const times = streaming.time.msBuffer;
+			const avg = times.reduce((a, b) => a + b) / times.length;
+			// Get the average time (ms) it took the last 5 tokens to generate
+
+			if (!streaming.typeyTimeout) return;
+			if (!smoothStreamingEnabled) return;
+			streaming.typeyTimeout = setTimeout(_char, avg);
+
+			if (!streaming.buffer.length) return;
+
+			streamBuffer.textContent += streaming.buffer[0];
+			streaming.buffer = streaming.buffer.slice(1);
+		}
+
+		streaming.typeyTimeout = setTimeout(_char, 10);
+	}
+
+	if (!streaming.time.preTime) streaming.time.preTime = new Date();
+
+	streaming.time.msBuffer.push(
+		(new Date().getTime() - streaming.time.preTime.getTime()) / 5
+		// 5 chosen because Concedo said something about 5 this morning and it seems to work
+	);
+
+	if (streaming.time.msBuffer.length > 5) streaming.time.msBuffer.shift();
+	streaming.time.preTime = new Date();
+
+	if (smoothStreamingEnabled) {
+		streaming.buffer += tokens[0];
+	} else {
+		streamBuffer.textContent += tokens[0];
+	}
+}
+
 function save_preset() {
 	socket.emit("save_new_preset", {"preset": document.getElementById("new_preset_name").value, "description": document.getElementById("new_preset_description").value});
 	closePopups();

From f4593ed04b7ae9c60c0868777e70b51d1cfb7422 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 23 Jul 2023 17:50:53 -0500
Subject: [PATCH 055/107] Streaming: Fix bad streamingwindow sync

sometimes the good solution is the best solution
---
 aiserver.py        | 5 ++++-
 static/koboldai.js | 9 ++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index bea0d662..9e871c6f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3278,7 +3278,10 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
     # Ignore new submissions if the AI is currently busy
     if(koboldai_vars.aibusy):
         return
-    
+
+    # Open up token stream
+    emit("stream_tokens", True, broadcast=True, room="UI_2")
+
     while(True):
         set_aibusy(1)
         koboldai_vars.actions.clear_unused_options()
diff --git a/static/koboldai.js b/static/koboldai.js
index 83fba5b7..f775f3f0 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -529,8 +529,6 @@ function process_actions_data(data) {
 	game_text_scroll_timeout = setTimeout(run_infinite_scroll_update.bind(null, action_type, actions, first_action), 200);
 	clearTimeout(auto_loader_timeout);
 	
-	streaming.windowOpen = true;
-	
 	hide_show_prompt();
 	//console.log("Took "+((Date.now()-start_time)/1000)+"s to process");
 	
@@ -3357,7 +3355,6 @@ function update_game_text(id, new_text) {
 			socket.emit("Set Selected Text", {"id": id, "text": ""});
 		}
 	}
-	
 }
 
 function stream_tokens(tokens) {
@@ -3366,6 +3363,11 @@ function stream_tokens(tokens) {
 
 	let streamBuffer = $el("#token-stream-buffer");
 
+	if (tokens === true) {
+		streaming.windowOpen = true;
+		return;
+	}
+
 	if (!streaming.windowOpen) {
 		// Reject tokens sent after the streaming window is closed
 		return;
@@ -3402,6 +3404,7 @@ function stream_tokens(tokens) {
 			// Get the average time (ms) it took the last 5 tokens to generate
 
 			if (!streaming.typeyTimeout) return;
+			if (!streaming.windowOpen) return;
 			if (!smoothStreamingEnabled) return;
 			streaming.typeyTimeout = setTimeout(_char, avg);
 

From 70d2da55e5f87c05743d7b0a19ccc6f2ff5bba49 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 24 Jul 2023 01:03:46 +0200
Subject: [PATCH 056/107] Readme changes

---
 README_GPTQ.md | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/README_GPTQ.md b/README_GPTQ.md
index e1961cb8..30f47eb5 100644
--- a/README_GPTQ.md
+++ b/README_GPTQ.md
@@ -2,26 +2,8 @@
 (This guide is for both Linux and Windows and assumes user has git installed and a basic grasp of command line use)
 
 #### Installation
-In the command prompt/command line navigate to where you want the KoboldAI subfolder to be created.
+For Nvidia users everything is automatically installed when you install the requirements, you merely need a compatible GPTQ model for it to show up.
 
-Note: do not run your command prompt as administrator/with elevated priviledges, reports suggest this leads to problems.
-
-`git clone https://github.com/0cc4m/KoboldAI -b latestgptq --recurse-submodules`
-
-`cd KoboldAI`
-
-Next step, (Windows) subfolder mode or B: option doesn't matter choose either
-
-* [if on Windows]
-  ```
-  install_requirements.bat
-  ```
-  * if it closes the window when it finishes, reopen a command prompt and navigate back to your KoboldAI directory.
-
-* [if on Linux with Nvidia] 
-  ```
-  ./install_requirements.sh
-  ```
 * [if on Linux with AMD]
   ```
   ./install_requirements.sh rocm
@@ -46,5 +28,5 @@ If you haven't done so already, exit the command prompt/leave KAI's conda env. (
 
 Run `play.bat` [windows], `play.sh` [linux Nvidia], or `play-rocm.sh` [linux AMD]
 
-Switch to UI2, then load your model.
+Load your model using Huggingface GPTQ as the backend option (This will show up when a valid GPTQ model is detected).
 

From 9fc9cb92f7ca8b8adf86d18c6c4080e44a7bfd4a Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 24 Jul 2023 01:39:16 +0200
Subject: [PATCH 057/107] Fancy streaming by default

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 25f4ea84..095f1f47 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1155,7 +1155,7 @@ class user_settings(settings):
         self.nogenmod    = False
         self.debug       = False    # If set to true, will send debug information to the client for display
         self.output_streaming = True
-        self.smooth_streaming = False
+        self.smooth_streaming = True
         self.show_probs = False # Whether or not to show token probabilities
         self.beep_on_complete = False
         self.img_gen_priority = 1

From 30495cf8d8fbca343a6c0156dd0fee59a8e4c814 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 24 Jul 2023 02:05:07 +0200
Subject: [PATCH 058/107] Fix GPT2

---
 modeling/inference_models/generic_hf_torch/class.py | 3 ++-
 modeling/inference_models/hf_torch.py               | 9 +++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index a0ac9b55..83a5a318 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -124,7 +124,8 @@ class model_backend(HFTorchInferenceModel):
             # We must disable low_cpu_mem_usage and if using a GPT-2 model
             # because GPT-2 is not compatible with this feature yet.
             tf_kwargs.pop("low_cpu_mem_usage", None)
-
+            tf_kwargs.pop("quantization_config", None)
+            
             # Also, lazy loader doesn't support GPT-2 models
             self.lazy_load = False
 
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 0b55e8dc..4ddf895b 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -126,8 +126,13 @@ class HFTorchInferenceModel(HFInferenceModel):
         return ret
 
     def get_auxilary_device(self) -> Union[str, int, torch.device]:
-        return self.breakmodel_config.primary_device
-
+        if self.breakmodel:
+            return self.breakmodel_config.primary_device
+        if self.usegpu:
+            return "cuda:0"
+        else:
+            return "cpu"
+        
     def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
         if self.breakmodel_config.primary_device == "cpu":
             return torch.float32

From 1df03d9a27b86a086abafe80aefc0db67aa8e3f0 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 23 Jul 2023 20:54:04 -0500
Subject: [PATCH 059/107] Basic

---
 .../inference_models/gptq_hf_torch/class.py   | 94 +++++++++++--------
 modeling/lazy_loader.py                       |  5 +
 modeling/patches.py                           | 19 ++++
 3 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 81a33c70..9a1b872e 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -89,6 +89,12 @@ class model_backend(HFTorchInferenceModel):
         return bool(gptq_model)
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
+        try:
+            import hf_bleeding_edge
+            from hf_bleeding_edge import AutoModelForCausalLM
+        except ImportError:
+            from transformers import AutoModelForCausalLM
+
         # Make model path the same as the model name to make this consistent
         # with the other loading method if it isn't a known model type. This
         # code is not just a workaround for below, it is also used to make the
@@ -98,7 +104,7 @@ class model_backend(HFTorchInferenceModel):
 
         self.init_model_config()
 
-        self.lazy_load = False
+        self.lazy_load = True
 
         gpulayers = self.breakmodel_config.gpu_blocks
 
@@ -181,50 +187,60 @@ class model_backend(HFTorchInferenceModel):
         model_type = self.get_model_type()
 
         logger.info(f"Using GPTQ file: {gptq_file}, {gptq_bits}-bit model, type {model_type}, version {gptq_version}{' (with bias)' if v2_bias else ''}, groupsize {gptq_groupsize}")
-        if model_type == "gptj":
-            model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
-        elif model_type == "gpt_neox":
-            model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
-        elif model_type == "llama":
-            model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
-        elif model_type == "opt":
-            model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
-        elif model_type == "mpt":
-            model = load_quant_offload(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
-        elif model_type == "gpt_bigcode":
-            model = load_quant_offload(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias).half()
-        else:
-            try:
-                import auto_gptq
-                from auto_gptq import AutoGPTQForCausalLM
-            except ImportError:
-                raise RuntimeError(f"4-bit load failed. Model type {model_type} not supported in 4-bit")
 
-            try:
-                import hf_bleeding_edge
-                from hf_bleeding_edge import AutoModelForCausalLM
-            except ImportError:
-                from transformers import AutoModelForCausalLM
 
-            # Monkey patch in hf_bleeding_edge to avoid having to trust remote code
-            auto_gptq.modeling._utils.AutoConfig = hf_bleeding_edge.AutoConfig
-            auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig
-            auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM
-            model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"))
+        with lazy_loader.use_lazy_load(
+            enable=self.lazy_load,
+            dematerialized_modules=False,
+        ):
+            print(self.lazy_load)
+            if model_type == "gptj":
+                model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+            elif model_type == "gpt_neox":
+                model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+            elif model_type == "llama":
+                print("LLLLLAAAMMMAA")
+                print(torch.load)
+                model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+            elif model_type == "opt":
+                model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+            elif model_type == "mpt":
+                model = load_quant_offload(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+            elif model_type == "gpt_bigcode":
+                model = load_quant_offload(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias).half()
+            else:
+                try:
+                    import auto_gptq
+                    from auto_gptq import AutoGPTQForCausalLM
+                except ImportError:
+                    raise RuntimeError(f"4-bit load failed. Model type {model_type} not supported in 4-bit")
 
-            # Patch in embeddings function
-            def get_input_embeddings(self):
-                return self.model.get_input_embeddings()
+                try:
+                    import hf_bleeding_edge
+                    from hf_bleeding_edge import AutoModelForCausalLM
+                except ImportError:
+                    from transformers import AutoModelForCausalLM
 
-            type(model).get_input_embeddings = get_input_embeddings
+                # Monkey patch in hf_bleeding_edge to avoid having to trust remote code
+                auto_gptq.modeling._utils.AutoConfig = hf_bleeding_edge.AutoConfig
+                auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig
+                auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM
 
-            # Patch in args support..
-            def generate(self, *args, **kwargs):
-                """shortcut for model.generate"""
-                with torch.inference_mode(), torch.amp.autocast(device_type=self.device.type):
-                    return self.model.generate(*args, **kwargs)
+                model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"))
 
-            type(model).generate = generate
+                # Patch in embeddings function
+                def get_input_embeddings(self):
+                    return self.model.get_input_embeddings()
+
+                type(model).get_input_embeddings = get_input_embeddings
+
+                # Patch in args support..
+                def generate(self, *args, **kwargs):
+                    """shortcut for model.generate"""
+                    with torch.inference_mode(), torch.amp.autocast(device_type=self.device.type):
+                        return self.model.generate(*args, **kwargs)
+
+                type(model).generate = generate
 
         return model
 
diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index 69e0d948..8fff59d3 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -358,16 +358,19 @@ def safetensors_load_tensor_independently(
 ) -> torch.Tensor:
     """A hacky way to load a tensor by itself and not mmap every single tensor
     or whatever is causing that big memory spike"""
+    print("[ld]", tensor_key)
 
     with safetensors.safe_open(checkpoint_file, framework="pt", device=device) as f:
         return f.get_tensor(tensor_key)
 
 
 def patch_safetensors(callback):
+    print("Hi! We are patching safetensors")
     # Safetensors load patch
     import transformers
 
     def safetensors_load(checkpoint_file: str) -> dict:
+        print("LOAD NOW", safetensors_load)
         # Monkeypatch applied to safetensors.torch.load_file
 
         if utils.koboldai_vars.hascuda:
@@ -409,6 +412,7 @@ def patch_safetensors(callback):
         return tensors
 
     transformers.modeling_utils.safe_load_file = safetensors_load
+    safetensors.torch.load_file = safetensors_load
 
 
 @contextlib.contextmanager
@@ -520,6 +524,7 @@ def use_lazy_load(
         old_torch_load = torch.load
 
         def torch_load(f, map_location=None, pickle_module=pickle, **pickle_load_args):
+            print("TORCHLOAD", f)
             model_dict = old_torch_load(
                 f=f,
                 map_location=map_location,
diff --git a/modeling/patches.py b/modeling/patches.py
index 6e2168f2..f5b6bd06 100644
--- a/modeling/patches.py
+++ b/modeling/patches.py
@@ -129,15 +129,34 @@ def patch_transformers_generation() -> None:
 
 
 class LazyloadPatches:
+    class StateDictFacade(dict):
+        def __init__(self, state_dict):
+            self.update(state_dict)
+
+        def __getitem__(self, name):
+            return super().__getitem__(name).materialize(map_location="cuda:0")
+
     old_load_state_dict = transformers.modeling_utils._load_state_dict_into_meta_model
+    torch_old_load_from_state_dict = torch.nn.Module._load_from_state_dict
 
     def __enter__() -> None:
         transformers.modeling_utils._load_state_dict_into_meta_model = (
             LazyloadPatches._load_state_dict_into_meta_model
         )
+        torch.nn.Module._load_from_state_dict = LazyloadPatches._torch_load_from_state_dict
+        # torch.nn.Module._load_from_state_dict = _agn
 
     def __exit__(exc_type, exc_value, exc_traceback) -> None:
         transformers.modeling_utils._load_state_dict_into_meta_model = LazyloadPatches.old_load_state_dict
+        torch.nn.Module._load_from_state_dict = LazyloadPatches.torch_old_load_from_state_dict
+
+    def _torch_load_from_state_dict(self, state_dict, *args, **kwargs):
+        return LazyloadPatches.torch_old_load_from_state_dict(
+            self,
+            LazyloadPatches.StateDictFacade(state_dict),
+            *args,
+            **kwargs
+        )
 
     def _load_state_dict_into_meta_model(
         model,

From 81e4c8a80744af4e9541fbb63dcfd281d86ee127 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 10:25:44 -0500
Subject: [PATCH 060/107] Backends: Fix GPTQ priority

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index fe499edc..b34eeebf 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -631,8 +631,8 @@ model_backend_module_names = {}
 model_backend_type_crosswalk = {}
 
 PRIORITIZED_BACKEND_MODULES = {
-    "gptq_hf_torch": 1,
-    "generic_hf_torch": 2
+    "gptq_hf_torch": 2,
+    "generic_hf_torch": 1
 }
 
 for module in os.listdir("./modeling/inference_models"):

From 2fb877db407224d502c33a9cdf88d95e46f3339e Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 10:28:22 -0500
Subject: [PATCH 061/107] Backends: Probably fix sorting

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index b34eeebf..2115c709 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6292,7 +6292,7 @@ def UI_2_select_model(data):
                 #so we'll just go through all the possible loaders
                 for model_backend in sorted(
                     model_backends,
-                    key=lambda x: model_backend_module_names[x] in PRIORITIZED_BACKEND_MODULES,
+                    key=lambda x: PRIORITIZED_BACKEND_MODULES.get(model_backend_module_names[x], 0),
                     reverse=True,
                 ):
                     if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):

From fc7fa991d536b7eb02f87262a2dac07edb1622bb Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 10:57:24 -0500
Subject: [PATCH 062/107] Streaming: Fix streaming not being cleaned up before
 commentator speaks

super duper critical
---
 aiserver.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index fe499edc..d7c3532e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3280,9 +3280,6 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
     if(koboldai_vars.aibusy):
         return
 
-    # Open up token stream
-    emit("stream_tokens", True, broadcast=True, room="UI_2")
-
     while(True):
         set_aibusy(1)
         koboldai_vars.actions.clear_unused_options()
@@ -3474,8 +3471,6 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                 set_aibusy(0)
                 emit('from_server', {'cmd': 'scrolldown', 'data': ''}, broadcast=True, room="UI_1")
                 break
-    # Clean up token stream
-    emit("stream_tokens", None, broadcast=True, room="UI_2")
 
 def apiactionsubmit_generate(txt, minimum, maximum):
     koboldai_vars.generated_tkns = 0
@@ -3903,7 +3898,10 @@ class HordeException(Exception):
 # Send text to generator and deal with output
 #==================================================================#
 
-def generate(txt, minimum, maximum, found_entries=None):    
+def generate(txt, minimum, maximum, found_entries=None):
+    # Open up token stream
+    emit("stream_tokens", True, broadcast=True, room="UI_2")
+
     koboldai_vars.generated_tkns = 0
 
     if(found_entries is None):
@@ -3940,7 +3938,10 @@ def generate(txt, minimum, maximum, found_entries=None):
             emit('from_server', {'cmd': 'errmsg', 'data': 'Error occurred during generator call; please check console.'}, broadcast=True, room="UI_1")
             logger.error(traceback.format_exc().replace("\033", ""))
             socketio.emit("error", str(e), broadcast=True, room="UI_2")
+
         set_aibusy(0)
+        # Clean up token stream
+        emit("stream_tokens", None, broadcast=True, room="UI_2")
         return
 
     for i in range(koboldai_vars.numseqs):
@@ -3972,7 +3973,10 @@ def generate(txt, minimum, maximum, found_entries=None):
         del genout
         gc.collect()
         torch.cuda.empty_cache()
-    
+
+    # Clean up token stream
+    emit("stream_tokens", None, broadcast=True, room="UI_2")
+
     maybe_review_story()
 
     set_aibusy(0)

From 30640acca7873894f2005328e65533f881cc271e Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 11:26:20 -0500
Subject: [PATCH 063/107] Editor: Don't allow editing or syncing during
 generation

Only bad things can come from that!

Also filter out stream buffer when fixing dirty game text (just in
case!)
---
 static/koboldai.js       | 42 +++++++++++++++++++++++++++++++++++++++-
 templates/index_new.html |  2 +-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index f775f3f0..4902908f 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -85,6 +85,7 @@ var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
 var selected_model_data;
 var privacy_mode_enabled = false;
+var ai_busy = false;
 
 var streaming = {
 	windowOpen: false,
@@ -775,6 +776,11 @@ function update_status_bar(data) {
 }
 
 function do_ai_busy(data) {
+	console.log("AIBUSY", data.value)
+	ai_busy = data.value;
+	// Don't allow editing while Mr. Kobold is thinking
+	document.getElementById("Selected Text").contentEditable = !ai_busy;
+
 	if (data.value) {
 		ai_busy_start = Date.now();
 		favicon.start_swap()
@@ -3265,6 +3271,11 @@ function fix_dirty_game_text() {
 	//This should get fired if we have deleted chunks or have added text outside of a node.
 	//We wait until after the game text has lost focus to fix things otherwise it messes with typing
 	var game_text = document.getElementById("Selected Text");
+
+	// Fix stray stream
+	const streamBufferEl = document.getElementById("#token-stream-buffer");
+	if (streamBufferEl) streamBufferEl.remove();
+
 	//Fix missing story prompt
 	if (dirty_chunks.includes("-1")) {
 		if (!document.getElementById("story_prompt")) {
@@ -3277,6 +3288,7 @@ function fix_dirty_game_text() {
 			game_text.prepend(story_prompt);
 		}
 	}
+
 	if (dirty_chunks.includes("game_text")) {
 		dirty_chunks = dirty_chunks.filter(item => item != "game_text");
 		console.log("Firing Fix messed up text");
@@ -3312,7 +3324,7 @@ function fix_dirty_game_text() {
 
 function savegametextchanges() {
 	fix_dirty_game_text();
-	for (item of document.getElementsByClassName("editing")) {
+	for (const item of document.getElementsByClassName("editing")) {
 		item.classList.remove("editing");
 	}
 	if (dirty_chunks.length > 0) {
@@ -7631,4 +7643,32 @@ $el("#gamescreen").addEventListener("paste", function(event) {
 		false,
 		event.clipboardData.getData("text/plain")
 	);
+});
+
+const gameText = document.getElementById("Selected Text");
+gameText.addEventListener("click", function(event) {
+	if (ai_busy) {
+		event.stopPropagation();
+		return;
+	};
+
+	set_edit(event);
+});
+
+gameText.addEventListener("focusout", function(event) {
+	if (ai_busy) {
+		event.stopPropagation();
+		return;
+	};
+
+	savegametextchanges();
+});
+
+gameText.addEventListener("paste", function(event) {
+	if (ai_busy) {
+		event.stopPropagation();
+		return;
+	};
+
+	check_game_after_paste();
 });
\ No newline at end of file
diff --git a/templates/index_new.html b/templates/index_new.html
index 99b8c941..25dee500 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -53,7 +53,7 @@
 				<div id="welcome_text" class="var_sync_model_welcome" draggable="False"></div>
 			</div>
 
-			<div class="gametext" id="Selected Text" contenteditable=false tabindex=0 onpaste="check_game_after_paste()" onfocusout="savegametextchanges();" onclick="return set_edit(event)" onkeyup="return set_edit(event);">
+			<div class="gametext" id="Selected Text" contenteditable="false" tabindex="0" onkeyup="return set_edit(event);">
 				<span id="story_prompt" class="var_sync_story_prompt var_sync_alt_story_prompt_in_ai rawtext hidden" chunk="-1"></span></div><!--don't move the /div down or it'll cause odd spacing issues in the UI--->
 		</div>
 

From 9cc6972c1c7ac6012181f7c6e43e6e7abb92a827 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 11:30:33 -0500
Subject: [PATCH 064/107] Shh!

---
 static/koboldai.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 4902908f..1544ee93 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -776,7 +776,6 @@ function update_status_bar(data) {
 }
 
 function do_ai_busy(data) {
-	console.log("AIBUSY", data.value)
 	ai_busy = data.value;
 	// Don't allow editing while Mr. Kobold is thinking
 	document.getElementById("Selected Text").contentEditable = !ai_busy;

From a6aafb252534b26bbdf034788895c3317b4cdd53 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 13:07:30 -0500
Subject: [PATCH 065/107] GPTQ: Patch QuantLinear to not use CPU RAM

---
 .../inference_models/gptq_hf_torch/class.py   | 23 ++++++++++++++++++-
 modeling/lazy_loader.py                       |  3 +--
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 9a1b872e..d942a539 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -7,7 +7,7 @@ import torch
 import re
 import shutil
 import sys
-from typing import Union
+from typing import Dict, Union
 
 import utils
 import modeling.lazy_loader as lazy_loader
@@ -167,6 +167,25 @@ class model_backend(HFTorchInferenceModel):
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
 
+    def _patch_quant(self) -> None:
+        # QuantLinear loads on the CPU by default, using a lot of RAM! If we
+        # load it to the same device that the weights are gonna be on, it
+        # mysteriously uses no additional VRAM
+
+        from gptq import quant_v3
+        from gptq import quant_v2
+        from gptq import quant_v1
+
+        def _ql_init_(self, *args, **kwargs):
+            ret = type(self)._unpatched_init(self, *args, **kwargs)
+            self.to("cuda:0")
+            return ret
+
+        for quant_module in [quant_v3, quant_v2, quant_v1]:
+            quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__
+            quant_module.QuantLinear.__init__ = _ql_init_
+
+
     def _get_model(self, location: str, tf_kwargs: Dict):
         import gptq
         from gptq.gptj import load_quant as gptj_load_quant
@@ -177,6 +196,8 @@ class model_backend(HFTorchInferenceModel):
         from gptq.mpt import load_quant as mpt_load_quant
         from gptq.offload import load_quant_offload
 
+        self._patch_quant()
+
         gptq_model, gptq_bits, gptq_groupsize, gptq_file, gptq_version = load_model_gptq_settings(location)
         v2_bias = False
 
diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index 8fff59d3..a5e7c58f 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -358,7 +358,6 @@ def safetensors_load_tensor_independently(
 ) -> torch.Tensor:
     """A hacky way to load a tensor by itself and not mmap every single tensor
     or whatever is causing that big memory spike"""
-    print("[ld]", tensor_key)
 
     with safetensors.safe_open(checkpoint_file, framework="pt", device=device) as f:
         return f.get_tensor(tensor_key)
@@ -379,7 +378,7 @@ def patch_safetensors(callback):
             # (70 tensors/s -> 65 tensor/s). The memory savings probably
             # shouldn't be the happening, maybe there's a memory leak
             # somewhere in our pipeline with CPU tensors.
-            intermediary_device = "cuda"
+            intermediary_device = "cuda:0"
         else:
             intermediary_device = "cpu"
 

From 4a6cccb00227561454e395b796aada44a60b05cf Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 13:09:15 -0500
Subject: [PATCH 066/107] Import fix

---
 modeling/inference_models/gptq_hf_torch/class.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index d942a539..499b2682 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -90,7 +90,6 @@ class model_backend(HFTorchInferenceModel):
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         try:
-            import hf_bleeding_edge
             from hf_bleeding_edge import AutoModelForCausalLM
         except ImportError:
             from transformers import AutoModelForCausalLM

From 929917efe9bb51aa4fe2147f6813205908efb3f6 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 13:09:43 -0500
Subject: [PATCH 067/107] Remove shrieking

---
 modeling/inference_models/gptq_hf_torch/class.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 499b2682..74f11e18 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -208,19 +208,15 @@ class model_backend(HFTorchInferenceModel):
 
         logger.info(f"Using GPTQ file: {gptq_file}, {gptq_bits}-bit model, type {model_type}, version {gptq_version}{' (with bias)' if v2_bias else ''}, groupsize {gptq_groupsize}")
 
-
         with lazy_loader.use_lazy_load(
             enable=self.lazy_load,
             dematerialized_modules=False,
         ):
-            print(self.lazy_load)
             if model_type == "gptj":
                 model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
             elif model_type == "gpt_neox":
                 model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
             elif model_type == "llama":
-                print("LLLLLAAAMMMAA")
-                print(torch.load)
                 model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
             elif model_type == "opt":
                 model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)

From 43a4abaf6320cc86e244cf103cc93b520339550e Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 13:10:33 -0500
Subject: [PATCH 068/107] Remove even more debug

---
 modeling/lazy_loader.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index a5e7c58f..74770a1c 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -176,9 +176,6 @@ class TorchLazyTensor(LazyTensor):
             CheckpointChunkCache.key = self.key
             ziproot = checkpoint.namelist()[0].split("/")[0]
             CheckpointChunkCache.handle = checkpoint.open(f"{ziproot}/data/{self.key}", "r")
-
-                
-                                
         else:
             # Cache hit. Hip hip hooray! :^)
             # print(".", end="", flush=True)
@@ -318,7 +315,6 @@ class _LazyUnpickler(RestrictedUnpickler):
     lazy_loaded_storages: Dict[str, LazyTensor]
 
     def __init__(self, *args, **kwargs):
-        # print(args, kwargs)
         self.lazy_loaded_storages = {}
         return super().__init__(*args, **kwargs)
 
@@ -364,12 +360,10 @@ def safetensors_load_tensor_independently(
 
 
 def patch_safetensors(callback):
-    print("Hi! We are patching safetensors")
     # Safetensors load patch
     import transformers
 
     def safetensors_load(checkpoint_file: str) -> dict:
-        print("LOAD NOW", safetensors_load)
         # Monkeypatch applied to safetensors.torch.load_file
 
         if utils.koboldai_vars.hascuda:
@@ -523,7 +517,6 @@ def use_lazy_load(
         old_torch_load = torch.load
 
         def torch_load(f, map_location=None, pickle_module=pickle, **pickle_load_args):
-            print("TORCHLOAD", f)
             model_dict = old_torch_load(
                 f=f,
                 map_location=map_location,

From 34aa333c44a16c38ce586efc0fb2118da0c20b0e Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 13:11:06 -0500
Subject: [PATCH 069/107] Last debug

---
 modeling/patches.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/patches.py b/modeling/patches.py
index f5b6bd06..5664ec07 100644
--- a/modeling/patches.py
+++ b/modeling/patches.py
@@ -144,7 +144,6 @@ class LazyloadPatches:
             LazyloadPatches._load_state_dict_into_meta_model
         )
         torch.nn.Module._load_from_state_dict = LazyloadPatches._torch_load_from_state_dict
-        # torch.nn.Module._load_from_state_dict = _agn
 
     def __exit__(exc_type, exc_value, exc_traceback) -> None:
         transformers.modeling_utils._load_state_dict_into_meta_model = LazyloadPatches.old_load_state_dict

From a73420c49c1371c49b59816d3122d6e6d4f3b676 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 17:15:59 -0500
Subject: [PATCH 070/107] really really really sketchy breakmodel
 implementation

im gonna go lie down for an extended period of time
---
 .../inference_models/gptq_hf_torch/class.py   | 175 +++++++++++++++---
 1 file changed, 153 insertions(+), 22 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 74f11e18..45d18f7b 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -82,6 +82,79 @@ def get_gptq_version(fpath):
             logger.warning(f"GPTQ model identified as v0, but v1={v1} and v2={v2}")
         return 0, False
 
+def load_quant_offload_device_map(
+    load_quant_func, model, checkpoint, wbits, groupsize, device_map, offload_type=0, force_bias=False,
+):
+    from gptq.offload import (
+        find_layers,
+        llama_offload_forward,
+        gptneox_offload_forward,
+        gptj_offload_forward,
+        opt_offload_forward,
+        bigcode_offload_forward
+    )
+    from transformers.models.llama.modeling_llama import LlamaModel
+    from transformers.models.opt.modeling_opt import OPTModel
+    from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXModel
+    from transformers.models.gptj.modeling_gptj import GPTJModel
+    from transformers.models.gpt_bigcode.modeling_gpt_bigcode import GPTBigCodeModel
+    model = load_quant_func(model, checkpoint, wbits, groupsize, force_bias=force_bias)
+
+    print(device_map)
+
+    m, layers, remaining = find_layers(model)
+
+    type(m).non_offload_forward = type(m).forward
+
+    # Hook offload_forward into found model
+    if type(m) == LlamaModel:
+        type(m).forward = llama_offload_forward
+    elif type(m) == GPTNeoXModel:
+        type(m).forward = gptneox_offload_forward
+    elif type(m) == GPTJModel:
+        type(m).forward = gptj_offload_forward
+    elif type(m) == OPTModel:
+        type(m).forward = opt_offload_forward
+    elif type(m) == GPTBigCodeModel:
+        type(m).forward = bigcode_offload_forward
+    else:
+        raise RuntimeError(f"Model type {type(m)} not supported by CPU offloader")
+
+    layers_done = len([1 for v in device_map.values() if v != "cpu"])
+    print("LDone", layers_done)
+
+    m.cpu_device = torch.device("cpu")
+    m.fast_offload = layers_done > len(layers) // 2
+    m.layer_count = len(layers)
+    m.cpu_layers = len(layers) - layers_done
+    m.gpu_layers = layers_done
+    m.offload_type = offload_type
+    # HACK
+    m.primary_gpu = list(device_map.values())[0]
+
+    if "layers" not in dir(m):
+        m.layers = layers
+
+    print(len(layers))
+    print(len(device_map))
+
+    print(m.primary_gpu)
+    for i in range(len(layers)):
+        dev = None
+        for key, device in device_map.items():
+            key = int(*[x for x in key.split(".") if x.isdecimal()])
+            if key == i:
+                dev = device
+                break
+        if dev is None:
+            raise ValueError
+        layers[key].to(dev, torch.float16, False)
+
+    for module in remaining:
+        module.to(m.primary_gpu)
+
+    return model
+
 
 class model_backend(HFTorchInferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
@@ -166,7 +239,7 @@ class model_backend(HFTorchInferenceModel):
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
 
-    def _patch_quant(self) -> None:
+    def _patch_quant(self, device_map) -> None:
         # QuantLinear loads on the CPU by default, using a lot of RAM! If we
         # load it to the same device that the weights are gonna be on, it
         # mysteriously uses no additional VRAM
@@ -175,14 +248,54 @@ class model_backend(HFTorchInferenceModel):
         from gptq import quant_v2
         from gptq import quant_v1
 
-        def _ql_init_(self, *args, **kwargs):
-            ret = type(self)._unpatched_init(self, *args, **kwargs)
-            self.to("cuda:0")
-            return ret
+        def make_quant(module, names, bits, groupsize, name='', force_bias=False):
+            if isinstance(module, quant_v3.QuantLinear):
+                return
 
-        for quant_module in [quant_v3, quant_v2, quant_v1]:
-            quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__
-            quant_module.QuantLinear.__init__ = _ql_init_
+            for attr in dir(module):
+                tmp = getattr(module, attr)
+                name1 = name + '.' + attr if name != '' else attr
+                if name1 in names:
+                    parts = name1.split(".")
+                    device = None
+                    for i in reversed(range(len(parts))):
+                        maybe_key = ".".join(parts[:i])
+                        if maybe_key in device_map:
+                            device = device_map[maybe_key]
+                            break
+
+                    if device is None:
+                        print(name1)
+                        print(device_map)
+                        raise ValueError
+
+                    print("[ql]", name1, device)
+                    delattr(module, attr)
+
+                    ql = quant_v3.QuantLinear(
+                        bits,
+                        groupsize,
+                        tmp.in_features,
+                        tmp.out_features,
+                        force_bias or tmp.bias is not None
+                    )
+                    ql = ql.to(device)
+
+                    setattr(module, attr, ql)
+
+            for name1, child in module.named_children():
+                make_quant(child, names, bits, groupsize, name + '.' + name1 if name != '' else name1, force_bias=force_bias)
+
+        quant_v3.make_quant = make_quant
+
+        # def _ql_init_(self, *args, **kwargs):
+        #     ret = type(self)._unpatched_init(self, *args, **kwargs)
+        #     self.to("cuda:0")
+        #     return ret
+
+        # for quant_module in [quant_v3, quant_v2, quant_v1]:
+        #     quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__
+        #     quant_module.QuantLinear.__init__ = _ql_init_
 
 
     def _get_model(self, location: str, tf_kwargs: Dict):
@@ -193,9 +306,12 @@ class model_backend(HFTorchInferenceModel):
         from gptq.opt import load_quant as opt_load_quant
         from gptq.bigcode import load_quant as bigcode_load_quant
         from gptq.mpt import load_quant as mpt_load_quant
-        from gptq.offload import load_quant_offload
 
-        self._patch_quant()
+        try:
+            import hf_bleeding_edge
+            from hf_bleeding_edge import AutoModelForCausalLM
+        except ImportError:
+            from transformers import AutoModelForCausalLM
 
         gptq_model, gptq_bits, gptq_groupsize, gptq_file, gptq_version = load_model_gptq_settings(location)
         v2_bias = False
@@ -208,22 +324,43 @@ class model_backend(HFTorchInferenceModel):
 
         logger.info(f"Using GPTQ file: {gptq_file}, {gptq_bits}-bit model, type {model_type}, version {gptq_version}{' (with bias)' if v2_bias else ''}, groupsize {gptq_groupsize}")
 
+        device_map = {}
+
+        if self.lazy_load:
+            with lazy_loader.use_lazy_load(dematerialized_modules=True):
+                metamodel = AutoModelForCausalLM.from_config(self.model_config)
+                if utils.args.cpu:
+                    device_map = {name: "cpu" for name in utils.layers_module_names}
+                    for name in utils.get_missing_module_names(
+                        metamodel, list(device_map.keys())
+                    ):
+                        device_map[name] = "cpu"
+                else:
+                    device_map = self.breakmodel_config.get_device_map(
+                        metamodel
+                    )
+
+        self._patch_quant(device_map)
+
         with lazy_loader.use_lazy_load(
             enable=self.lazy_load,
             dematerialized_modules=False,
         ):
             if model_type == "gptj":
-                model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+                model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "gpt_neox":
-                model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+                model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "llama":
-                model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+                print("YE LAMA")
+
+                # model = llama_load_quant(location, gptq_file, gptq_bits, gptq_groupsize, force_bias=v2_bias)
+                model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "opt":
-                model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+                model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "mpt":
-                model = load_quant_offload(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias)
+                model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "gpt_bigcode":
-                model = load_quant_offload(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias).half()
+                model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
             else:
                 try:
                     import auto_gptq
@@ -231,12 +368,6 @@ class model_backend(HFTorchInferenceModel):
                 except ImportError:
                     raise RuntimeError(f"4-bit load failed. Model type {model_type} not supported in 4-bit")
 
-                try:
-                    import hf_bleeding_edge
-                    from hf_bleeding_edge import AutoModelForCausalLM
-                except ImportError:
-                    from transformers import AutoModelForCausalLM
-
                 # Monkey patch in hf_bleeding_edge to avoid having to trust remote code
                 auto_gptq.modeling._utils.AutoConfig = hf_bleeding_edge.AutoConfig
                 auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig

From ad4528b5a6882e1bdb46e111be90d6f931090733 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 17:17:57 -0500
Subject: [PATCH 071/107] critical change

---
 modeling/inference_models/gptq_hf_torch/class.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 45d18f7b..10349388 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -103,7 +103,6 @@ def load_quant_offload_device_map(
     print(device_map)
 
     m, layers, remaining = find_layers(model)
-
     type(m).non_offload_forward = type(m).forward
 
     # Hook offload_forward into found model

From c80de5120c3bfd28f5a4963eabd562915bc7d015 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 19:45:33 -0500
Subject: [PATCH 072/107] Cleanup

---
 .../inference_models/gptq_hf_torch/class.py   | 69 ++++++-------------
 1 file changed, 22 insertions(+), 47 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 10349388..6fae6779 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -100,8 +100,6 @@ def load_quant_offload_device_map(
     from transformers.models.gpt_bigcode.modeling_gpt_bigcode import GPTBigCodeModel
     model = load_quant_func(model, checkpoint, wbits, groupsize, force_bias=force_bias)
 
-    print(device_map)
-
     m, layers, remaining = find_layers(model)
     type(m).non_offload_forward = type(m).forward
 
@@ -120,7 +118,6 @@ def load_quant_offload_device_map(
         raise RuntimeError(f"Model type {type(m)} not supported by CPU offloader")
 
     layers_done = len([1 for v in device_map.values() if v != "cpu"])
-    print("LDone", layers_done)
 
     m.cpu_device = torch.device("cpu")
     m.fast_offload = layers_done > len(layers) // 2
@@ -134,10 +131,6 @@ def load_quant_offload_device_map(
     if "layers" not in dir(m):
         m.layers = layers
 
-    print(len(layers))
-    print(len(device_map))
-
-    print(m.primary_gpu)
     for i in range(len(layers)):
         dev = None
         for key, device in device_map.items():
@@ -184,10 +177,6 @@ class model_backend(HFTorchInferenceModel):
         except (ValueError, AttributeError):
             self.gpu_layers_list = [utils.num_layers(self.model_config)]
 
-        tf_kwargs = {
-            "low_cpu_mem_usage": True,
-        }
-
         # If we're using torch_lazy_loader, we need to get breakmodel config
         # early so that it knows where to load the individual model tensors
         logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
@@ -200,9 +189,6 @@ class model_backend(HFTorchInferenceModel):
             self.breakmodel_device_config(self.model_config)
 
         if self.lazy_load:
-            # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
-            tf_kwargs.pop("low_cpu_mem_usage", None)
-
             # If we're using lazy loader, we need to figure out what the model's hidden layers are called
             with lazy_loader.use_lazy_load(dematerialized_modules=True):
                 try:
@@ -218,7 +204,7 @@ class model_backend(HFTorchInferenceModel):
 
         if self.get_local_model_path():
             # Model is stored locally, load it.
-            self.model = self._get_model(self.get_local_model_path(), tf_kwargs)
+            self.model = self._get_model(self.get_local_model_path())
             self.tokenizer = self._get_tokenizer(self.get_local_model_path())
         else:
             raise NotImplementedError("GPTQ Model downloading not implemented")
@@ -238,17 +224,9 @@ class model_backend(HFTorchInferenceModel):
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
 
-    def _patch_quant(self, device_map) -> None:
-        # QuantLinear loads on the CPU by default, using a lot of RAM! If we
-        # load it to the same device that the weights are gonna be on, it
-        # mysteriously uses no additional VRAM
-
-        from gptq import quant_v3
-        from gptq import quant_v2
-        from gptq import quant_v1
-
-        def make_quant(module, names, bits, groupsize, name='', force_bias=False):
-            if isinstance(module, quant_v3.QuantLinear):
+    def _patch_quant(self, device_map, quant_module) -> None:
+        def make_quant(module, names, bits, groupsize, name='', force_bias=False, **kwargs):
+            if isinstance(module, quant_module.QuantLinear):
                 return
 
             for attr in dir(module):
@@ -264,19 +242,17 @@ class model_backend(HFTorchInferenceModel):
                             break
 
                     if device is None:
-                        print(name1)
-                        print(device_map)
-                        raise ValueError
+                        raise ValueError(f"No device for {name1}")
 
-                    print("[ql]", name1, device)
                     delattr(module, attr)
 
-                    ql = quant_v3.QuantLinear(
+                    ql = quant_module.QuantLinear(
                         bits,
                         groupsize,
                         tmp.in_features,
                         tmp.out_features,
-                        force_bias or tmp.bias is not None
+                        force_bias or tmp.bias is not None,
+                        **kwargs,
                     )
                     ql = ql.to(device)
 
@@ -285,19 +261,21 @@ class model_backend(HFTorchInferenceModel):
             for name1, child in module.named_children():
                 make_quant(child, names, bits, groupsize, name + '.' + name1 if name != '' else name1, force_bias=force_bias)
 
-        quant_v3.make_quant = make_quant
-
-        # def _ql_init_(self, *args, **kwargs):
-        #     ret = type(self)._unpatched_init(self, *args, **kwargs)
-        #     self.to("cuda:0")
-        #     return ret
-
-        # for quant_module in [quant_v3, quant_v2, quant_v1]:
-        #     quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__
-        #     quant_module.QuantLinear.__init__ = _ql_init_
+        quant_module.make_quant = make_quant
 
 
-    def _get_model(self, location: str, tf_kwargs: Dict):
+    def _patch_quants(self, device_map) -> None:
+        # Load QuantLinears on the device corresponding to the device map
+
+        from gptq import quant_v3
+        from gptq import quant_v2
+        from gptq import quant_v1
+
+        for quant_module in [quant_v3, quant_v2, quant_v1]:
+            self._patch_quant(device_map, quant_module)
+
+
+    def _get_model(self, location: str):
         import gptq
         from gptq.gptj import load_quant as gptj_load_quant
         from gptq.gptneox import load_quant as gptneox_load_quant
@@ -339,7 +317,7 @@ class model_backend(HFTorchInferenceModel):
                         metamodel
                     )
 
-        self._patch_quant(device_map)
+        self._patch_quants(device_map)
 
         with lazy_loader.use_lazy_load(
             enable=self.lazy_load,
@@ -350,9 +328,6 @@ class model_backend(HFTorchInferenceModel):
             elif model_type == "gpt_neox":
                 model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "llama":
-                print("YE LAMA")
-
-                # model = llama_load_quant(location, gptq_file, gptq_bits, gptq_groupsize, force_bias=v2_bias)
                 model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
             elif model_type == "opt":
                 model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)

From 0f88d520ed05281994672c7d7740e23f3812ccfe Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 24 Jul 2023 21:45:52 -0500
Subject: [PATCH 073/107] UI: Replace shift_down code with builtin
 event.shiftKey

Keeping a global variable that tracks shift is worse because it can get
desynced if you leave the window while holding shift (which apparently
happens a lot more than you would think)
---
 static/koboldai.js | 59 ++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 33 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index f775f3f0..242b77e0 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -58,7 +58,6 @@ var rename_return_emit_name = "popup_rename";
 var popup_rows = [];
 var popup_style = "";
 var popup_sort = {};
-var shift_down = false;
 var world_info_data = {};
 var world_info_folder_data = {};
 var saved_settings = {};
@@ -4925,49 +4924,44 @@ function getCookie(cname, default_return=null) {
 }
 
 function detect_enter_submit(e) {
-	if (((e.code == "Enter") || (e.code == "NumpadEnter")) && !(shift_down)) {
-		if (typeof e.stopPropagation != "undefined") {
-			e.stopPropagation();
-		} else {
-			e.cancelBubble = true;
-		}
-		//console.log("submitting");
-		document.getElementById("btnsubmit").onclick();
-		setTimeout(function() {document.getElementById('input_text').value = '';}, 1);
+	if (e.shiftKey) return;
+	if (!["Enter", "NumpadEnter"].includes(e.key)) return;
+
+	if (typeof e.stopPropagation != "undefined") {
+		e.stopPropagation();
+	} else {
+		e.cancelBubble = true;
 	}
+
+	//console.log("submitting");
+	document.getElementById("btnsubmit").onclick();
+	setTimeout(function() {document.getElementById('input_text').value = '';}, 1);
 }
 
 function detect_enter_text(e) {
-	if (((e.code == "Enter") || (e.code == "NumpadEnter")) && !(shift_down)) {
-		if (typeof e.stopPropagation != "undefined") {
-			e.stopPropagation();
-		} else {
-			e.cancelBubble = true;
-		}
-		//get element
-		//console.log("Doing Text Enter");
-		//console.log(e.currentTarget.activeElement);
-		if (e.currentTarget.activeElement != undefined) {
-			var item = $(e.currentTarget.activeElement);
-			item.onchange();
-		}
+	if (e.shiftKey) return;
+	if (!["Enter", "NumpadEnter"].includes(e.key)) return;
+
+	if (typeof e.stopPropagation != "undefined") {
+		e.stopPropagation();
+	} else {
+		e.cancelBubble = true;
+	}
+	//get element
+	//console.log("Doing Text Enter");
+	//console.log(e.currentTarget.activeElement);
+	if (e.currentTarget.activeElement != undefined) {
+		var item = $(e.currentTarget.activeElement);
+		item.onchange();
 	}
 }
 
 function detect_key_down(e) {
-	if ((e.code == "ShiftLeft") || (e.code == "ShiftRight")) {
-		shift_down = true;
-	} else if (e.code == "Escape") {
+	if (e.code == "Escape") {
 		close_menus();
 	}
 }
 
-function detect_key_up(e) {
-	if ((e.code == "ShiftLeft") || (e.code == "ShiftRight")) {
-		shift_down = false;
-	}
-}
-
 function selectTab(tab) {
 	let tabTarget = document.getElementById(tab.getAttribute("tab-target"));
 	let tabClass = Array.from(tab.classList).filter((c) => c.startsWith("tab-"))[0];
@@ -5935,7 +5929,6 @@ function openClubImport() {
 //// INIT ////
 
 document.onkeydown = detect_key_down;
-document.onkeyup = detect_key_up;
 document.getElementById("input_text").onkeydown = detect_enter_submit;
 
 /* -- Popups -- */

From 79226ea66d10d579b019e43d7d1a86ede3659956 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 25 Jul 2023 21:51:03 +0200
Subject: [PATCH 074/107] Hide TPU API during load

---
 tpu_mtj_backend.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 5a5271e2..8a9fa832 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1116,10 +1116,11 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
     thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ())
     maps.thread_resources.env = thread_resources_env
     if initial_load:
-        logger.message(f"KoboldAI has finished loading and is available at the following link for UI 1: {koboldai_vars.cloudflare_link}")
-        logger.message(f"KoboldAI has finished loading and is available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui")
-        logger.message(f"KoboldAI has finished loading and is available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
-        logger.message(f"KoboldAI has finished loading and is available at the following link for the API: {koboldai_vars.cloudflare_link}/api")
+        logger.message(f"KoboldAI has still loading your model but available at the following link for UI 1: {koboldai_vars.cloudflare_link}")
+        logger.message(f"KoboldAI has still loading your model but available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui")
+        logger.message(f"KoboldAI has still loading your model but available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
+        logger.message(f"KoboldAI has still loading your model but available at the following link for the API: [Loading Model...]")
+        logger.message(f"While the model loads you can use the above links to begin setting up your session, for generations you must wait until after its done loading.")
 
     global badwords
     # These are the tokens that we don't want the AI to ever write

From b20f320b223e6beb26052b2151b08a18330b4433 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Tue, 25 Jul 2023 22:46:02 -0500
Subject: [PATCH 075/107] Redo workaround

---
 aiserver.py              |   5 +
 koboldai_settings.py     |  24 +++-
 static/koboldai.css      |  42 +++---
 static/koboldai.js       | 290 +++++++++++++++++++++++----------------
 templates/index_new.html |   4 +-
 5 files changed, 221 insertions(+), 144 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 77afc3d0..6d50ca73 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3902,6 +3902,10 @@ def generate(txt, minimum, maximum, found_entries=None):
     # Open up token stream
     emit("stream_tokens", True, broadcast=True, room="UI_2")
 
+    # HACK: Show options when streaming more than 1 sequence
+    if utils.koboldai_vars.output_streaming:
+        koboldai_vars.actions.show_options(koboldai_vars.numseqs > 1, force=True)
+
     koboldai_vars.generated_tkns = 0
 
     if(found_entries is None):
@@ -6166,6 +6170,7 @@ def UI_2_Set_Selected_Text(data):
 @socketio.on('Use Option Text')
 @logger.catch
 def UI_2_Use_Option_Text(data):
+    koboldai_vars.actions.show_options(False)
     if koboldai_vars.prompt == "":
         koboldai_vars.prompt = koboldai_vars.actions.get_current_options()[int(data['option'])]['text']
         koboldai_vars.actions.clear_unused_options()
diff --git a/koboldai_settings.py b/koboldai_settings.py
index 095f1f47..bf824a7c 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1756,11 +1756,15 @@ class KoboldStoryRegister(object):
     
     def go_forward(self):
         action_step = self.action_count+1
-        if action_step in self.actions:
-            if len(self.get_current_options()) == 1:
-                logger.warning("Going forward with this text: {}".format(self.get_current_options()[0]["text"]))
-                self.use_option([x['text'] for x in self.actions[action_step]["Options"]].index(self.get_current_options()[0]["text"]))
-    
+        if action_step not in self.actions:
+            return
+
+        self.show_options(len(self.get_current_options()) > 1)
+
+        if len(self.get_current_options()) == 1:
+            logger.warning("Going forward with this text: {}".format(self.get_current_options()[0]["text"]))
+            self.use_option([x['text'] for x in self.actions[action_step]["Options"]].index(self.get_current_options()[0]["text"]))
+
     def use_option(self, option_number, action_step=None):
         if action_step is None:
             action_step = self.action_count+1
@@ -1798,6 +1802,16 @@ class KoboldStoryRegister(object):
                 process_variable_changes(self._socketio, "story", 'actions', {"id": action_step, 'action':  self.actions[action_step]}, None)
                 self.set_game_saved()
     
+    def show_options(
+        self,
+        should_show: bool,
+        force: bool = False,
+
+    ) -> None:
+        if self._koboldai_vars.aibusy and not force:
+            return
+        self._socketio.emit("show_options", should_show, broadcast=True, room="UI_2")
+    
     def delete_action(self, action_id, keep=True):
         if action_id in self.actions:
             old_options = copy.deepcopy(self.actions[action_id]["Options"])
diff --git a/static/koboldai.css b/static/koboldai.css
index 145f217e..1c2ebef3 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1528,7 +1528,7 @@ body {
 	grid-template-columns: 30px auto 30% 30px;
 	grid-template-rows: auto min-content min-content 100px;
 }
-.main-grid[option_length="0"][model_numseqs="1"] {
+.main-grid[hide-options="true"] {
 	grid-template-columns: 30px auto 0px 30px;
 }
 
@@ -1613,39 +1613,39 @@ body {
 	font-style: italic;
 }
 
-.sequence_area {
+#option-container {
 	margin-top: 10px;
 	grid-area: options;
 	background-color: var(--sequence_area_background);
 	overflow-y: scroll;
 }
 
-.sequence_area::-webkit-scrollbar {
+#option-container::-webkit-scrollbar {
 	display: none;
 }
 
 @media only screen and (max-aspect-ratio: 7/5) {
-.sequences {
-	margin-top: 5px;
-	width: 100%;
-	border: 0px;
-	border-spacing: 0;
-	display: flex;
-	flex-direction: row;
-	overflow-x: scroll;
-	scroll-snap-type: x mandatory;
-}
+	#option-container {
+		margin-top: 5px;
+		width: 100%;
+		border: 0px;
+		border-spacing: 0;
+		display: flex;
+		flex-direction: row;
+		overflow-x: scroll;
+		scroll-snap-type: x mandatory;
+	}
 }
 
 @media only screen and (min-aspect-ratio: 7/5) {
-.sequences {
-	margin-top: 5px;
-	width: 100%;
-	border: 0px;
-	border-spacing: 0;
-	display: flex;
-	flex-direction: column;
-}
+	#option-container {
+		margin-top: 5px;
+		width: 100%;
+		border: 0px;
+		border-spacing: 0;
+		display: flex;
+		flex-direction: column;
+	}
 }
 
 .sequence_row {
diff --git a/static/koboldai.js b/static/koboldai.js
index 1544ee93..87f2f944 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -38,6 +38,7 @@ socket.on("scratchpad_response", recieveScratchpadResponse);
 socket.on("show_error_notification", function(data) { reportError(data.title, data.text) });
 socket.on("generated_wi", showGeneratedWIData);
 socket.on("stream_tokens", stream_tokens);
+socket.on("show_options", show_options);
 //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});});
 
 // Must be done before any elements are made; we track their changes.
@@ -86,6 +87,7 @@ var initial_socketio_connection_occured = false;
 var selected_model_data;
 var privacy_mode_enabled = false;
 var ai_busy = false;
+var can_show_options = false;
 
 var streaming = {
 	windowOpen: false,
@@ -309,7 +311,7 @@ function reset_story() {
 	}
 	
 	//clear any options
-	var option_area = document.getElementById("Select Options");
+	var option_area = document.getElementById("option-container");
 	while (option_area.firstChild) {
 		option_area.removeChild(option_area.firstChild);
 	}
@@ -341,7 +343,7 @@ function reset_story() {
 		document.getElementById("Selected Text").setAttribute("contenteditable", "true");
 		
 	}
-	document.getElementById('main-grid').setAttribute('option_length', 0);
+	document.getElementById('main-grid').setAttribute("hide-options", true);
 
 	$(".chat-message").remove();
 	addInitChatMessage();
@@ -361,22 +363,143 @@ function fix_text(val) {
 	}
 }
 
+function create_option_element(text, optionId, actionId, type, itemPinned=false) {
+	// Type must be "gen" or "history"
+	const optionContainer = $el("#option-container");
+	const row = $e("div", optionContainer, {
+		classes: ["sequence_row"],
+		"option_id": optionId,
+		"action_id": actionId,
+	});
+
+	const textcell = document.createElement("span");
+	textcell.textContent = text;
+	textcell.classList.add("sequence");
+	textcell.setAttribute("option_id", optionId);
+	textcell.setAttribute("option_chunk", actionId);
+
+	const iconcell = document.createElement("span");
+	iconcell.setAttribute("option_id", optionId);
+	iconcell.setAttribute("option_chunk", actionId);
+	iconcell.classList.add("sequnce_icon");
+
+	const icon = document.createElement("span");
+	icon.id = "Pin_"+optionId;
+	icon.classList.add("material-icons-outlined");
+	icon.classList.add("option_icon");
+	icon.classList.add("cursor");
+
+	if (type === "gen") {
+		icon.classList.add("pin");
+		icon.textContent = "push_pin";
+
+		if (itemPinned) {
+			icon.classList.add('rotate_45');
+		} else {
+			icon.setAttribute('style', "filter: brightness(50%);");
+		}
+
+		iconcell.addEventListener("click", function() {
+			socket.emit("Pinning", {
+				chunk: actionId,
+				option: optionId
+			});
+		});
+	} else if (type === "history") {
+		icon.textContent = "cached";
+
+		const delete_icon = $e("span", iconcell, {
+			classes: ["material-icons-outlined", "cursor", 'option_icon'],
+			tooltip: "Delete Option",
+			option_id: optionId,
+			option_chunk: actionId,
+			textContent: 'delete'
+		});
+
+		delete_icon.addEventListener("click", function() {
+			socket.emit("delete_option", {
+				chunk: actionId,
+				option: optionId
+			});
+		});
+	}
+
+
+	iconcell.append(icon);
+
+	textcell.addEventListener("click", function() {
+		socket.emit("Use Option Text", {
+			chunk: actionId,
+			option: optionId,
+		});
+	});
+
+	row.append(textcell);
+	row.append(iconcell);
+	optionContainer.append(row);
+	return row;
+}
+
+function action_count_changed() {
+	// Delete all options before the next chunk to hidden
+	const option_container = document.getElementById("option-container");
+	const current_chunk = parseInt(document.getElementById("action_count").textContent) + 1;
+
+	for (const chunk of Array.from(option_container.children)) {
+		if (parseInt(chunk.getAttribute("action_id")) === current_chunk) {
+			// good
+		} else {
+			chunk.remove();
+		}
+	}
+}
+
+function visible_options_present() {
+	const optionContainer = $el("#option-container");
+	for (const el of optionContainer.childNodes) {
+		if (el.classList.contains("hidden")) continue;
+		return true;
+	}
+	return false;
+}
+
+function show_options(doShow) {
+	can_show_options = doShow;
+	let show = doShow;// && visible_options_present();
+	$el("#option-container").classList.toggle("hidden", !show);
+	$el("#main-grid").setAttribute("hide-options", !show);
+
+	if (show) {
+		const action = actions_data[current_action + 1];
+		if (!action) return;
+
+		create_options({
+			id: current_action+1,
+			action: action
+		});
+	}
+}
+
 function create_options(action) {
 	//Set all options before the next chunk to hidden
-	if (action.id  != current_action+1) {
+	if (action.id != current_action+1) {
 		return;
 	}
-	var option_chunk = document.getElementById("Select Options");
-	
-	//first, let's clear out our existing data
-	while (option_chunk.firstChild) {
-		option_chunk.removeChild(option_chunk.firstChild);
+
+	if (!can_show_options) {
+		return;
 	}
-	
+
+	// First, let's clear out our existing data. Note: use querySelectorAll to
+	// iterate for deletion because other methods resize the list during iteration
+	for (const option of document.querySelectorAll(".sequence_row")) {
+		option.remove();
+	}
+
 	//Let's check if we only have a single redo option. In that case we din't show as the user can use the redo button
-	seen_prev_selection = false;
-	show_options = false;
-	for (item of action.action.Options) {
+	let seen_prev_selection = false;
+	let show_options = false;
+	for (const item of action.action.Options) {
 		if (!(item['Previous Selection']) && !(item['Edited'])) {
 			show_options = true;
 			break;
@@ -389,100 +512,46 @@ function create_options(action) {
 			}
 		}
 	}
-	if (!(show_options)) {
-		document.getElementById('main-grid').setAttribute('option_length', 0);
+
+	const mainGrid = $el("#main-grid");
+	const optionContainer = $el("#option-container");
+
+	if (!show_options) {
+		mainGrid.setAttribute("hide-options", true);
+		optionContainer.classList.add("hidden");
 		return;
 	}
-	
-	document.getElementById('main-grid').setAttribute('option_length', action.action.Options.length);
-	
-	var table = document.createElement("div");
-	table.classList.add("sequences");
-	//Add Redo options
-	let added_options=0;
-	i=0;
-	for (item of action.action.Options) {
-		if ((item['Previous Selection']) && (item.text != "")) {
-			var row = document.createElement("div");
-			row.classList.add("sequence_row");
-			var textcell = document.createElement("span");
-			textcell.textContent = item.text;
-			textcell.classList.add("sequence");
-			textcell.setAttribute("option_id", i);
-			textcell.setAttribute("option_chunk", action.id);
-			var iconcell = document.createElement("span");
-			iconcell.setAttribute("option_id", i);
-			iconcell.setAttribute("option_chunk", action.id);
-			iconcell.classList.add("sequnce_icon");
-			var icon = document.createElement("span");
-			icon.id = "Pin_"+i;
-			icon.classList.add("material-icons-outlined");
-			icon.classList.add("option_icon");
-			icon.classList.add("cursor");
-			icon.textContent = "cached";
-			iconcell.append(icon);
-			delete_icon = $e("span", iconcell, {"classes": ["material-icons-outlined", "cursor", 'option_icon'], 
-												"tooltip": "Delete Option", 'option_id': i,
-												'option_chunk': action.id, 'textContent': 'delete'});
-			delete_icon.onclick = function () {
-									socket.emit("delete_option", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")});
-							  };
-			textcell.onclick = function () {
-									socket.emit("Use Option Text", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")});
-							  };
-			row.append(textcell);
-			row.append(iconcell);
-			table.append(row);
-			added_options+=1;
-		}
-		i+=1;
+
+	// mainGrid.setAttribute("hide-options", false);
+	// optionContainer.classList.toggle("hidden", action.action.Options.length < 1);
+
+	// Gens
+	let optionId = 0;
+	for (const item of action.action.Options) {
+		if (!item.text) continue;
+		if (item.Edited) continue;
+		if (item["Previous Selection"]) continue;
+
+		create_option_element(item.text, optionId, action.id, "gen", item.Pinned);
+		optionId++;
 	}
-	//Add general options
-	i=0;
-	for (item of action.action.Options) {
-		if (!(item.Edited) && !(item['Previous Selection']) && (item.text != "")) {
-			var row = document.createElement("div");
-			row.classList.add("sequence_row");
-			var textcell = document.createElement("span");
-			textcell.textContent = item.text;
-			textcell.classList.add("sequence");
-			textcell.setAttribute("option_id", i);
-			textcell.setAttribute("option_chunk", action.id);
-			var iconcell = document.createElement("span");
-			iconcell.setAttribute("option_id", i);
-			iconcell.setAttribute("option_chunk", action.id);
-			iconcell.classList.add("sequnce_icon");
-			var icon = document.createElement("span");
-			icon.id = "Pin_"+i;
-			icon.classList.add("material-icons-outlined");
-			icon.classList.add("option_icon");
-			icon.classList.add("cursor");
-			icon.classList.add("pin");
-			icon.textContent = "push_pin";
-			if (!(item.Pinned)) {
-				icon.setAttribute('style', "filter: brightness(50%);");
-			} else {
-				icon.classList.add('rotate_45');
-			}
-			iconcell.append(icon);
-			iconcell.onclick = function () {
-									socket.emit("Pinning", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")});
-							   };
-			textcell.onclick = function () {
-									socket.emit("Use Option Text", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")});
-							  };
-			row.append(textcell);
-			row.append(iconcell);
-			table.append(row);
-			added_options+=1;
-		}
-		i+=1;
+
+
+	// History
+	optionId = 0;
+	for (const item of action.action.Options) {
+		if (!item.text) continue;
+		if (!item["Previous Selection"]) continue;
+
+		create_option_element(item.text, optionId, action.id, "history");
+		optionId++;
 	}
-	if (added_options > 0) {
-		option_chunk.append(table);
-	}
-	
-	
+
+	let anyOptions = visible_options_present();
+
+	$el("#option-container").classList.toggle("hidden", !anyOptions);
+	$el("#main-grid").setAttribute("hide-options", !anyOptions);
+
 	//make sure our last updated chunk is in view
 	//option_chunk.scrollIntoView();
 }
@@ -520,7 +589,8 @@ function process_actions_data(data) {
 		//update
 		action_type = "update";
 	}
-	for (action of actions) {
+
+	for (const action of actions) {
 		actions_data[parseInt(action.id)] = action.action;
 		do_story_text_updates(action);
 		create_options(action);
@@ -1029,20 +1099,8 @@ function var_changed(data) {
 	//	Change_Theme(getCookie("theme", "Monochrome"));
 	//}
 	
-	//Set all options before the next chunk to hidden
 	if ((data.classname == "actions") && (data.name == "Action Count")) {
-		var option_container = document.getElementById("Select Options");
-		var current_chunk = parseInt(document.getElementById("action_count").textContent)+1;
-		
-		var children = option_container.children;
-		for (var i = 0; i < children.length; i++) {
-			var chunk = children[i];
-			if (chunk.id == "Select Options Chunk " + current_chunk) {
-				chunk.classList.remove("hidden");
-			} else {
-				chunk.classList.add("hidden");
-			}
-		}
+		action_count_changed();
 	}
 	
 	
diff --git a/templates/index_new.html b/templates/index_new.html
index 25dee500..50fb0281 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -44,7 +44,7 @@
 	</div>
 
 	<!------------ Main Screen--------------------->
-	<div id="main-grid" class="main-grid settings_pinned var_sync_alt_model_numseqs" onclick="close_menus();" option_length="0">
+	<div id="main-grid" class="main-grid settings_pinned var_sync_alt_model_numseqs" onclick="close_menus();" hide-options="true">
 		<!------------ Game Text Screen--------------------->
 		<div class="gamescreen" id="gamescreen" context-menu="gamescreen">
 			<div id="disconnect_message"><center><h1>Disconnected</h1></center></div>
@@ -59,7 +59,7 @@
 
 		<!------------ Sequences --------------------->
 		<div id="action_count" class="var_sync_actions_Action_Count hidden"></div>
-		<div id="Select Options" class="sequence_area"></div>
+		<div id="option-container" class="hidden"></div>
 
 		<!-- Story Review -->
 		<div id="story-review" class="hidden">

From af5580638e9dd810bb9175b267c3d023be53c46f Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Wed, 26 Jul 2023 16:16:39 +0200
Subject: [PATCH 076/107] Update README_GPTQ.md

---
 README_GPTQ.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_GPTQ.md b/README_GPTQ.md
index 30f47eb5..77966c7d 100644
--- a/README_GPTQ.md
+++ b/README_GPTQ.md
@@ -8,7 +8,7 @@ For Nvidia users everything is automatically installed when you install the requ
   ```
   ./install_requirements.sh rocm
   ./commandline-rocm.sh
-  pip install git+https://github.com/0cc4m/GPTQ-for-LLaMa@c884b421a233f9603d8224c9b22c2d83dd2c1fc4
+  pip install git+https://github.com/0cc4m/GPTQ-for-LLaMa
   ```
   * If you get error missing hip/hip_runtime_xxx.h you dont have proper rocm & hip pkg installed
   * If you get CUDA_HOME envar is not set run in env: 

From 889fe8d548d688b697b2aaa874fa6dadade9f5b8 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Wed, 26 Jul 2023 19:35:55 +0200
Subject: [PATCH 077/107] Fix Peft

---
 modeling/inference_models/hf_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 4ddf895b..548c1c4a 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -325,7 +325,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         with torch.no_grad():
             start_time = time.time()
             genout = self.model.generate(
-                gen_in,
+                input_ids=gen_in,
                 do_sample=True,
                 max_length=min(
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length

From 37babe1edd7ca46e900ffe435ce802dc5c2286b9 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 28 Jul 2023 15:04:25 +0200
Subject: [PATCH 078/107] Allow _rebuild_meta_tensor_no_storage

---
 aiserver.py             | 8 ++++++--
 modeling/lazy_loader.py | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 77afc3d0..e2698af7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1690,8 +1690,11 @@ class RestrictedUnpickler(pickle.Unpickler):
     def find_class(self, module, name):
         if module == "collections" and name == "OrderedDict":
             return collections.OrderedDict
-        elif module == "torch._utils" and name == "_rebuild_tensor_v2":
-            return torch._utils._rebuild_tensor_v2
+        elif module == "torch._utils" and name in (
+            "_rebuild_tensor_v2",
+            "_rebuild_meta_tensor_no_storage",
+        ):
+            return getattr(torch._utils, name)
         elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
             return torch._tensor._rebuild_from_type_v2
         elif module == "torch" and name in (
@@ -1706,6 +1709,7 @@ class RestrictedUnpickler(pickle.Unpickler):
             "BoolStorage",
             "BFloat16Storage",
             "Tensor",
+            "float16",
         ):
             return getattr(torch, name)
         elif module == "numpy.core.multiarray" and name == "scalar":
diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index 74770a1c..b1a34e2e 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -272,8 +272,11 @@ class RestrictedUnpickler(pickle.Unpickler):
     def find_class(self, module, name):
         if module == "collections" and name == "OrderedDict":
             return collections.OrderedDict
-        elif module == "torch._utils" and name == "_rebuild_tensor_v2":
-            return torch._utils._rebuild_tensor_v2
+        elif module == "torch._utils" and name in (
+            "_rebuild_tensor_v2",
+            "_rebuild_meta_tensor_no_storage",
+        ):
+            return getattr(torch._utils, name)
         elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
             return _patched_rebuild_from_type_v2
         elif module == "torch" and name in (
@@ -288,6 +291,7 @@ class RestrictedUnpickler(pickle.Unpickler):
             "BoolStorage",
             "BFloat16Storage",
             "Tensor",
+            "float16",
         ):
             return getattr(torch, name)
         elif module == "numpy.core.multiarray" and name == "scalar":

From 184c3d9302222a29e4e9908015eb70124b81ffd4 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 28 Jul 2023 15:38:29 -0500
Subject: [PATCH 079/107] Move pickle stuff into modeling/pickling.py

Ended up not moving to utils.py because most of the stuff in there
isn't really model related, and it feels messy to just throw whatever in
there. Originally the file was named "modeling/utils.py" and was going
to be a place for assorted model-related functions, but I think this is
better.
---
 aiserver.py             |  74 +--------------------------
 modeling/lazy_loader.py | 102 +-----------------------------------
 modeling/pickling.py    | 111 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 114 insertions(+), 173 deletions(-)
 create mode 100644 modeling/pickling.py

diff --git a/aiserver.py b/aiserver.py
index e2698af7..8003e463 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -71,6 +71,8 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForToken
 import transformers
 import ipaddress
 from functools import wraps
+from modeling.inference_models.utils import RestrictedUnpickler, use_custom_unpickler
+
 try:
     from transformers.models.opt.modeling_opt import OPTDecoder
 except:
@@ -1678,79 +1680,7 @@ def unload_model():
     #Reload our badwords
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
 
-class RestrictedUnpickler(pickle.Unpickler):
-    def original_persistent_load(self, saved_id):
-        return super().persistent_load(saved_id)
 
-    def forced_persistent_load(self, saved_id):
-        if saved_id[0] != "storage":
-            raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
-        return self.original_persistent_load(saved_id)
-
-    def find_class(self, module, name):
-        if module == "collections" and name == "OrderedDict":
-            return collections.OrderedDict
-        elif module == "torch._utils" and name in (
-            "_rebuild_tensor_v2",
-            "_rebuild_meta_tensor_no_storage",
-        ):
-            return getattr(torch._utils, name)
-        elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
-            return torch._tensor._rebuild_from_type_v2
-        elif module == "torch" and name in (
-            "DoubleStorage",
-            "FloatStorage",
-            "HalfStorage",
-            "LongStorage",
-            "IntStorage",
-            "ShortStorage",
-            "CharStorage",
-            "ByteStorage",
-            "BoolStorage",
-            "BFloat16Storage",
-            "Tensor",
-            "float16",
-        ):
-            return getattr(torch, name)
-        elif module == "numpy.core.multiarray" and name == "scalar":
-            return np.core.multiarray.scalar
-        elif module == "numpy" and name == "dtype":
-            return np.dtype
-        elif module == "_codecs" and name == "encode":
-            return _codecs.encode
-        else:
-            # Forbid everything else.
-            qualified_name = name if module == "__builtin__" else f"{module}.{name}"
-            raise pickle.UnpicklingError(
-                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}"
-            )
-
-    def load(self, *args, **kwargs):
-        self.original_persistent_load = getattr(
-            self, "persistent_load", pickle.Unpickler.persistent_load
-        )
-        self.persistent_load = self.forced_persistent_load
-        return super().load(*args, **kwargs)
-    
-@contextlib.contextmanager
-def use_custom_unpickler(unpickler: Type[pickle.Unpickler] = RestrictedUnpickler):
-    try:
-        old_unpickler = pickle.Unpickler
-        pickle.Unpickler = unpickler
-
-        old_pickle_load = pickle.load
-
-        def new_pickle_load(*args, **kwargs):
-            return pickle.Unpickler(*args, **kwargs).load()
-
-        pickle.load = new_pickle_load
-
-        yield
-
-    finally:
-        pickle.Unpickler = old_unpickler
-        pickle.load = old_pickle_load
-    
 def load_model(model_backend, initial_load=False):
     global model
     global tokenizer
diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index b1a34e2e..eece7d2f 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -51,15 +51,12 @@ import time
 import zipfile
 import pickle
 import torch
-import numpy as np
-import collections
-import _codecs
 import os
 from typing import Any, Callable, Dict, Optional, Tuple, Type
 
-from torch import Tensor
 from torch.nn import Module
 from torch.storage import UntypedStorage
+from modeling.pickling import RestrictedUnpickler, use_custom_unpickler
 from modeling.patches import LazyloadPatches
 
 # Safetensors is a dependency for the local version, TPU/Colab doesn't
@@ -236,84 +233,6 @@ class SafetensorsLazyTensor(LazyTensor):
             self.checkpoint_file, tensor_key=self.key, device=self.location
         )
 
-def _patched_rebuild_from_type_v2(func, new_type, args, state):
-    """A patched version of torch._tensor._rebuild_from_type_v2 that
-    does not attempt to convert `LazyTensor`s to `torch.Tensor`s."""
-
-    ret = func(*args)
-
-    # BEGIN PATCH
-    transformation_ok = isinstance(ret, LazyTensor) and new_type == Tensor
-    if type(ret) is not new_type and not transformation_ok:
-    # END PATCH
-        ret = ret.as_subclass(new_type)
-
-    # Tensor does define __setstate__ even though it doesn't define
-    # __getstate__. So only use __setstate__ if it is NOT the one defined
-    # on Tensor
-    if (
-        getattr(ret.__class__, "__setstate__", Tensor.__setstate__)
-        is not Tensor.__setstate__
-    ):
-        ret.__setstate__(state)
-    else:
-        ret = torch._utils._set_obj_state(ret, state)
-    return ret
-
-class RestrictedUnpickler(pickle.Unpickler):
-    def original_persistent_load(self, saved_id):
-        return super().persistent_load(saved_id)
-
-    def forced_persistent_load(self, saved_id):
-        if saved_id[0] != "storage":
-            raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
-        return self.original_persistent_load(saved_id)
-
-    def find_class(self, module, name):
-        if module == "collections" and name == "OrderedDict":
-            return collections.OrderedDict
-        elif module == "torch._utils" and name in (
-            "_rebuild_tensor_v2",
-            "_rebuild_meta_tensor_no_storage",
-        ):
-            return getattr(torch._utils, name)
-        elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
-            return _patched_rebuild_from_type_v2
-        elif module == "torch" and name in (
-            "DoubleStorage",
-            "FloatStorage",
-            "HalfStorage",
-            "LongStorage",
-            "IntStorage",
-            "ShortStorage",
-            "CharStorage",
-            "ByteStorage",
-            "BoolStorage",
-            "BFloat16Storage",
-            "Tensor",
-            "float16",
-        ):
-            return getattr(torch, name)
-        elif module == "numpy.core.multiarray" and name == "scalar":
-            return np.core.multiarray.scalar
-        elif module == "numpy" and name == "dtype":
-            return np.dtype
-        elif module == "_codecs" and name == "encode":
-            return _codecs.encode
-        else:
-            # Forbid everything else.
-            qualified_name = name if module == "__builtin__" else f"{module}.{name}"
-            raise pickle.UnpicklingError(
-                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}"
-            )
-
-    def load(self, *args, **kwargs):
-        self.original_persistent_load = getattr(
-            self, "persistent_load", pickle.Unpickler.persistent_load
-        )
-        self.persistent_load = self.forced_persistent_load
-        return super().load(*args, **kwargs)
-
 
 class _LazyUnpickler(RestrictedUnpickler):
     lazy_loaded_storages: Dict[str, LazyTensor]
@@ -412,25 +331,6 @@ def patch_safetensors(callback):
     safetensors.torch.load_file = safetensors_load
 
 
-@contextlib.contextmanager
-def use_custom_unpickler(unpickler: Type[pickle.Unpickler] = RestrictedUnpickler):
-    try:
-        old_unpickler = pickle.Unpickler
-        pickle.Unpickler = unpickler
-
-        old_pickle_load = pickle.load
-
-        def new_pickle_load(*args, **kwargs):
-            return pickle.Unpickler(*args, **kwargs).load()
-
-        pickle.load = new_pickle_load
-
-        yield
-
-    finally:
-        pickle.Unpickler = old_unpickler
-        pickle.load = old_pickle_load
-
 def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
     for hook in self._load_state_dict_pre_hooks.values():
         hook(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
diff --git a/modeling/pickling.py b/modeling/pickling.py
new file mode 100644
index 00000000..ccdb3b40
--- /dev/null
+++ b/modeling/pickling.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+import collections
+import contextlib
+import pickle
+
+import _codecs
+import numpy as np
+import torch
+from torch import Tensor
+
+import modeling
+
+
+def _patched_rebuild_from_type_v2(func, new_type, args, state):
+    """A patched version of torch._tensor._rebuild_from_type_v2 that
+    does not attempt to convert `LazyTensor`s to `torch.Tensor`s."""
+
+    ret = func(*args)
+
+    # BEGIN PATCH
+    transformation_ok = isinstance(ret, modeling.lazy_loader.LazyTensor) and new_type == Tensor
+    if type(ret) is not new_type and not transformation_ok:
+        # END PATCH
+        ret = ret.as_subclass(new_type)
+
+    # Tensor does define __setstate__ even though it doesn't define
+    # __getstate__. So only use __setstate__ if it is NOT the one defined
+    # on Tensor
+    if (
+        getattr(ret.__class__, "__setstate__", Tensor.__setstate__)
+        is not Tensor.__setstate__
+    ):
+        ret.__setstate__(state)
+    else:
+        ret = torch._utils._set_obj_state(ret, state)
+    return ret
+
+
+class RestrictedUnpickler(pickle.Unpickler):
+    def original_persistent_load(self, saved_id):
+        return super().persistent_load(saved_id)
+
+    def forced_persistent_load(self, saved_id):
+        if saved_id[0] != "storage":
+            raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
+        return self.original_persistent_load(saved_id)
+
+    def find_class(self, module, name):
+        if module == "collections" and name == "OrderedDict":
+            return collections.OrderedDict
+        elif module == "torch._utils" and name in (
+            "_rebuild_tensor_v2",
+            "_rebuild_meta_tensor_no_storage",
+        ):
+            return getattr(torch._utils, name)
+        elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
+            return _patched_rebuild_from_type_v2
+        elif module == "torch" and name in (
+            "DoubleStorage",
+            "FloatStorage",
+            "HalfStorage",
+            "LongStorage",
+            "IntStorage",
+            "ShortStorage",
+            "CharStorage",
+            "ByteStorage",
+            "BoolStorage",
+            "BFloat16Storage",
+            "Tensor",
+            "float16",
+        ):
+            return getattr(torch, name)
+        elif module == "numpy.core.multiarray" and name == "scalar":
+            return np.core.multiarray.scalar
+        elif module == "numpy" and name == "dtype":
+            return np.dtype
+        elif module == "_codecs" and name == "encode":
+            return _codecs.encode
+        else:
+            # Forbid everything else.
+            qualified_name = name if module == "__builtin__" else f"{module}.{name}"
+            raise pickle.UnpicklingError(
+                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}"
+            )
+
+    def load(self, *args, **kwargs):
+        self.original_persistent_load = getattr(
+            self, "persistent_load", pickle.Unpickler.persistent_load
+        )
+        self.persistent_load = self.forced_persistent_load
+        return super().load(*args, **kwargs)
+
+
+@contextlib.contextmanager
+def use_custom_unpickler(unpickler: pickle.Unpickler = RestrictedUnpickler):
+    try:
+        old_unpickler = pickle.Unpickler
+        pickle.Unpickler = unpickler
+
+        old_pickle_load = pickle.load
+
+        def new_pickle_load(*args, **kwargs):
+            return pickle.Unpickler(*args, **kwargs).load()
+
+        pickle.load = new_pickle_load
+        yield
+
+    finally:
+        pickle.Unpickler = old_unpickler
+        pickle.load = old_pickle_load

From eb95c45ff16bb0a729af298615a0ede3c746a89a Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 28 Jul 2023 16:03:55 -0500
Subject: [PATCH 080/107] Fix import

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 8003e463..5c622793 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -71,7 +71,7 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForToken
 import transformers
 import ipaddress
 from functools import wraps
-from modeling.inference_models.utils import RestrictedUnpickler, use_custom_unpickler
+from modeling.pickling import RestrictedUnpickler, use_custom_unpickler
 
 try:
     from transformers.models.opt.modeling_opt import OPTDecoder

From 8cc0a8cab97cebfa1e33f021a14cc14ab4a5fa95 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 30 Jul 2023 14:25:09 -0500
Subject: [PATCH 081/107] WI: Fix UI1 WI errors

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b7e2be66..133d20bc 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -4396,8 +4396,8 @@ def requestwi():
 #  and items in different folders are sorted based on the order of the folders
 #==================================================================#
 def stablesortwi():
-    mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)}
-    koboldai_vars.worldinfo.sort(key=lambda x: mapping[str(x["folder"])] if x["folder"] is not None else float("inf"))
+    mapping = {int(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)}
+    koboldai_vars.worldinfo.sort(key=lambda x: mapping[int(x["folder"])] if x["folder"] is not None else float("inf"))
     last_folder = ...
     last_wi = None
     for i, wi in enumerate(koboldai_vars.worldinfo):

From 61e3ffb2efb8a9417e4d7ed079f82da27cff4af7 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Sun, 30 Jul 2023 16:12:20 -0500
Subject: [PATCH 082/107] UI: Fix the thingey

---
 static/koboldai.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index d2ecac0a..fe154605 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -6089,7 +6089,7 @@ function closePopups(userAction=false) {
 
 	for (const popupWindow of container.children) {
 		// Do not let the user close windows they shouldn't be! Sneaky devils!
-		if (userAction && popupWindow.getAttribute("allow-close") === "false") {
+		if (userAction && popupWindow.getAttribute("allow-close") === "false" && !popupWindow.classList.contains("hidden")) {
 			allHidden = false;
 			continue;
 		}

From 23e54b6658dc48867f3bb443884d79f5ac349f59 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 31 Jul 2023 12:30:37 -0500
Subject: [PATCH 083/107] WI: Workaround for Chrome order weirdness

Chrome fires `blur()` before deleting nodes, meaning the -1 WI was
getting sent after being deleted, resulting in two
`delete_new_world_info_entry` packets being sent to the browser.

Really, it would be better to not do this full WI reset/sync cycle and
just send state changes and update accordingly. That would stop all the
WI weirdness probably.
---
 static/koboldai.js | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index f7067851..3a646c5c 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -23,7 +23,13 @@ socket.on('error_popup', function(data){error_popup(data);});
 socket.on("world_info_entry", function(data){process_world_info_entry(data);});
 socket.on("world_info_entry_used_in_game", function(data){world_info_entry_used_in_game(data);});
 socket.on("world_info_folder", function(data){world_info_folder(data);});
-socket.on("delete_new_world_info_entry", function(data){document.getElementById("world_info_-1").remove();});
+socket.on("delete_new_world_info_entry", function(data) {
+	const card = $el("#world_info_-1");
+	// Prevent weird race condition/strange event call order where blur event
+	// fires before removal is finished on Chrome
+	card.removing = true
+	card.remove();
+});
 socket.on("delete_world_info_entry", function(data){document.getElementById("world_info_"+data).remove();});
 socket.on("delete_world_info_folder", function(data){document.getElementById("world_info_folder_"+data).remove();});
 socket.on("error", function(data){show_error_message(data);});
@@ -3254,6 +3260,8 @@ function upload_file_without_save(file_box) {
 }
 
 function send_world_info(uid) {
+	const cardEl = document.getElementById(`world_info_${uid}`);
+	if (cardEl.removing) return;
 	socket.emit("edit_world_info", world_info_data[uid]);
 }
 

From d4001186df267997c7879911665e4d033caf4095 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 31 Jul 2023 12:41:19 -0500
Subject: [PATCH 084/107] UI: Hold shift to skip confirmation dialog

idea stolen from discord, who likely stole it from somebody else
---
 static/koboldai.js | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 3a646c5c..e1953525 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1288,7 +1288,7 @@ function redrawPopup() {
 			delete_icon.setAttribute("tooltip", "Delete");
 			delete_icon.id = row.path;
 			delete_icon.setAttribute("folder", row.isFolder);
-			delete_icon.onclick = function () {
+			delete_icon.addEventListener("click", function(event) {
 				const message = this.getAttribute("folder") == "true" ?  "Do you really want to delete this folder and ALL files under it?" : "Do you really want to delete this file?";
 				const delId = this.id;
 
@@ -1298,9 +1298,11 @@ function redrawPopup() {
 					denyText="I've changed my mind!",
 					confirmCallback=function() {
 						socket.emit("popup_delete", delId);
-					}
+					},
+					null,
+					event.shiftKey
 				);
-			};
+			});
 		}
 		icon_area.append(delete_icon);
 		tr.append(icon_area);
@@ -2248,10 +2250,11 @@ function world_info_entry(data) {
 	delete_icon.id = "world_info_delete_"+data.uid;
 	delete_icon.setAttribute("uid", data.uid);
 	delete_icon.setAttribute("wi-title", data.title);
-	delete_icon.onclick = function () {
+	delete_icon.addEventListener("click", function (event) {
 		const wiTitle = this.getAttribute("wi-title");
 		const wiUid = parseInt(this.getAttribute("uid"));
 		const wiElement = this.parentElement.parentElement;
+
 		deleteConfirmation([
 				{text: "You're about to delete World Info entry "},
 				{text: wiTitle, format: "bold"},
@@ -2265,9 +2268,11 @@ function world_info_entry(data) {
 				} else {
 					socket.emit("delete_world_info", wiUid);
 				}
-			}
+			},
+			null,
+			event.shiftKey
 		);
-	}
+	});
 
 	const wiImgContainer = world_info_card.querySelector(".world_info_image_container");
 	const wiImg = wiImgContainer.querySelector(".world_info_image");
@@ -2742,8 +2747,9 @@ function world_info_folder(data) {
 			delete_button.classList.add("cursor");
 			delete_button.setAttribute("folder", folder_name);
 			delete_button.textContent = "delete";
-			delete_button.onclick = function () {
+			delete_button.addEventListener("click", function (event) {
 				const folderName = this.getAttribute("folder");
+
 				deleteConfirmation([
 						{text: "You're about to delete World Info folder "},
 						{text: folderName, format: "bold"},
@@ -2753,9 +2759,11 @@ function world_info_folder(data) {
 					],
 					confirmText="Go for it.",
 					denyText="I've changed my mind!",
-					confirmCallback=function() { socket.emit("delete_wi_folder", folderName); }
+					confirmCallback=function() { socket.emit("delete_wi_folder", folderName); },
+					null,
+					event.shiftKey
 				);
-			};
+			});
 			delete_button.classList.add("delete");
 			title.append(delete_button);
 			
@@ -6946,9 +6954,13 @@ function sFormatted2HTML(sFormatted) {
 	return outHTML;
 }
 
-function deleteConfirmation(sFormatted, confirmText, denyText, confirmCallback, denyCallback) {
+function deleteConfirmation(sFormatted, confirmText, denyText, confirmCallback, denyCallback=null, bypass=false) {
+	if (bypass) {
+		confirmCallback();
+		return;
+	}
+
 	$el("#confirm-text").innerHTML = sFormatted2HTML(sFormatted);
-	
 	$el("#confirm-confirm-button > .text").innerText = confirmText;
 	$el("#confirm-deny-button > .text").innerText = denyText;
 

From c066494c7015ed2b891815a4e6cd99f9c54803e7 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Wed, 2 Aug 2023 10:01:35 +0200
Subject: [PATCH 085/107] No safetensors for TPU

---
 tpu_mtj_backend.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 8a9fa832..3f524d12 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1303,7 +1303,7 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
                     except Exception as e:
                         tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
             try:
-                model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_safetensors=False)
             except Exception as e:
                 model     = GPTNeoForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
         elif(os.path.isdir("models/{}".format(koboldai_vars.model.replace('/', '_')))):
@@ -1318,7 +1318,7 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
                     except Exception as e:
                         tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
             try:
-                model     = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_safetensors=False)
             except Exception as e:
                 model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
         else:
@@ -1333,7 +1333,7 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
                     except Exception as e:
                         tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
             try:
-                model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", use_safetensors=False)
             except Exception as e:
                 model     = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
 

From fe0c391e8fc12b609156b05108acb1d3e8bfa033 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Wed, 2 Aug 2023 10:50:00 +0200
Subject: [PATCH 086/107] Only show stopped if started

---
 modeling/inference_models/hf_mtj/class.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index c0f70843..a4600465 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -133,7 +133,8 @@ class model_backend(HFInferenceModel):
             utils.koboldai_vars.compiling = True
 
         def mtj_stopped_compiling_callback() -> None:
-            print(Colors.GREEN + "TPU backend compilation stopped" + Colors.END)
+            if utils.koboldai_vars.compiling:
+                print(Colors.GREEN + "TPU backend compilation stopped" + Colors.END)
             utils.koboldai_vars.compiling = False
 
         def mtj_settings_callback() -> dict:

From 87382f0adf7a2c7e102c91224a5e94e2097d4a12 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 4 Aug 2023 16:40:20 +0200
Subject: [PATCH 087/107] BnB 41

---
 environments/huggingface.yml | 4 ++--
 requirements.txt             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 64224dc3..9f05a1f8 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -41,8 +41,8 @@ dependencies:
     - ansi2html
     - flask_compress
     - ijson
-    - bitsandbytes==0.40.0.post4; sys_platform == 'linux'
-    - https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
+    - bitsandbytes==0.41.0; sys_platform == 'linux'
+    - https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl; sys_platform == 'win32'
     - ftfy
     - pydub
     - diffusers
diff --git a/requirements.txt b/requirements.txt
index f58c3c13..1b39f025 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,8 +28,8 @@ psutil
 ansi2html
 flask_compress
 ijson
-bitsandbytes==0.40.0.post4; sys_platform == 'linux'
-https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
+bitsandbytes==0.41.0; sys_platform == 'linux'
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl; sys_platform == 'win32'
 ftfy
 py==1.11.0
 pydub

From 6e47215e8456c53eab1536ab19df784aa4334b83 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 4 Aug 2023 22:34:18 +0200
Subject: [PATCH 088/107] Modern Defaults

---
 gensettings.py       |  6 +++---
 koboldai_settings.py | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gensettings.py b/gensettings.py
index ae84edaa..4b395266 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -7,7 +7,7 @@ gensettingstf = [
 	"min": 16,
 	"max": 512,
 	"step": 2,
-	"default": 80,
+	"default": 200,
     "tooltip": "Number of tokens to be generated. Higher values will take longer to generate.",
     "menu_path": "Settings",
     "sub_path":  "Generation",
@@ -182,9 +182,9 @@ gensettingstf = [
 	"label": "Context Tokens",
 	"id": "settknmax", 
 	"min": 512,
-	"max": 2048,
+	"max": 4096,
 	"step": 8,
-	"default": 1024,
+	"default": 2048,
     "tooltip": "Number of context tokens to submit to the AI for sampling. Make sure this is higher than Output Length. Higher values increase VRAM/RAM usage.",
     "menu_path": "Settings",
     "sub_path":  "Generation",
diff --git a/koboldai_settings.py b/koboldai_settings.py
index db101b34..0e939f6b 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -653,7 +653,7 @@ class model_settings(settings):
                          'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
                          'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
     settings_name = "model"
-    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 0.7, "rep_pen_range": 1024, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
+    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
                         "sampler_order": [6,0,1,2,3,4,5]}
     def __init__(self, socketio, koboldai_vars):
         self.enable_whitelist = False
@@ -691,13 +691,13 @@ class model_settings(settings):
         self.simple_randomness = 0 #Set first as this affects other outputs
         self.simple_creativity = 0 #Set first as this affects other outputs
         self.simple_repitition = 0 #Set first as this affects other outputs
-        self.max_length  = 1024    # Maximum number of tokens to submit per action
+        self.max_length  = 2048    # Maximum number of tokens to submit per action
         self.ikmax       = 3000    # Maximum number of characters to submit to InferKit
-        self.genamt      = 80      # Amount of text for each action to generate
+        self.genamt      = 200      # Amount of text for each action to generate
         self.ikgen       = 200     # Number of characters for InferKit to generate
         self.rep_pen     = 1.1     # Default generator repetition_penalty
-        self.rep_pen_slope = 0.7   # Default generator repetition penalty slope
-        self.rep_pen_range = 1024  # Default generator repetition penalty range
+        self.rep_pen_slope = 1.0   # Default generator repetition penalty slope
+        self.rep_pen_range = 2048  # Default generator repetition penalty range
         self.temp        = 0.5     # Default generator temperature
         self.top_p       = 0.9     # Default generator top_p
         self.top_k       = 0       # Default generator top_k

From 06d6364b6bedbbfe6a85aa1775d9f58d6e962264 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Mon, 7 Aug 2023 23:43:27 +0800
Subject: [PATCH 089/107] updated lite to v54

---
 static/klite.html | 126 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 38 deletions(-)

diff --git a/static/klite.html b/static/klite.html
index 57c877cc..33ba94f0 100644
--- a/static/klite.html
+++ b/static/klite.html
@@ -3,7 +3,7 @@
 
 <!-- 
 An embedded version of Kobold Lite for use in koboldcpp and KoboldAI United Client
-Current version: 50
+Current version: 54
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and KoboldAI United Client. Please do not remove this line.
 
@@ -60,22 +60,23 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 		.settinglabel input {
 			width: 6ch;
-			background-color: inherit;
-			border: none;
+			background-color: #1a3364;
+			/* border: none; */
 			outline: none;
 		}
 		.settinglabel input[type=checkbox] {
 			width: 3ch;
 		}
 
-		.settinglabel.mininiput {
+
+		.settinglabel.miniinput {
 			background-color: #ffffff;
 			color:#555;
 			border:0px solid #ccc;
 			border-radius: 4px;
 			width: 100%;
 		}
-		.settinglabel.mininiput:focus {
+		.settinglabel.miniinput:focus {
 			color:#555;
 		}
 
@@ -1276,7 +1277,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		{
 			background-color: #434343;
 			overflow-y: auto;
-			max-height: 250px;
+			max-height: 320px;
 			min-height: 60px;
 		}
 		.witoggleroff,.witoggleroff:hover,.witoggleroff:focus
@@ -2151,7 +2152,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			formatted = JSON.stringify(data);
 			if(formatted && formatted!="")
 			{
-				formatted = formatted.substring(0,400);
+				formatted = formatted.substring(0,500);
 			}
 			else
 			{
@@ -3811,6 +3812,36 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		render_gametext();
 	}
 
+	function load_tavern_wi(obj,chatopponent,myname)
+	{
+		console.log("Append Tavern WI");
+		current_wi = [];
+		for (let key in obj.entries) {
+			var itm = obj.entries[key];
+			var karr = itm.key;
+			if(!karr)
+			{
+				karr = itm.keys;
+			}
+			var ksarr = itm.keysecondary;
+			if(!ksarr)
+			{
+				ksarr = itm.secondary_keys;
+			}
+			let nwi = {
+				"key": karr.join(","),
+				"keysecondary": (ksarr.length > 0 ? ksarr.join(",") : ""),
+				"content": itm.content,
+				"comment": itm.comment,
+				"folder": null,
+				"selective": itm.selective,
+				"constant": itm.constant
+			};
+			nwi.content = replaceAll(nwi.content,"{{char}}",chatopponent,true);
+			nwi.content = replaceAll(nwi.content,"{{user}}",myname,true);
+			current_wi.push(nwi);
+		}
+	}
 	function load_tavern_obj(obj)
 	{
 		console.log("Loading tavern obj");
@@ -3825,6 +3856,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		let scenario = obj.scenario?obj.scenario:"";
 		let examplemsg = obj.mes_example?obj.mes_example:"";
 		let greeting = obj.first_mes?obj.first_mes:"";
+
 		//post process
 		memory = replaceAll(memory,"{{char}}",chatopponent,true);
 		scenario = replaceAll(scenario,"{{char}}",chatopponent,true);
@@ -3846,21 +3878,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		//check if it's a world info only card, if so, do not restart game
 		if(combinedmem.trim()=="" && greeting=="" && obj.entries)
 		{
-			console.log("Append Tavern WI");
-			current_wi = [];
-			for (let key in obj.entries) {
-				var itm = obj.entries[key];
-				let nwi = {
-					"key": itm.key.join(","),
-					"keysecondary": (itm.keysecondary.length > 0 ? itm.keysecondary.join(",") : ""),
-					"content": itm.content,
-					"comment": itm.comment,
-					"folder": null,
-					"selective": itm.selective,
-					"constant": itm.constant
-				};
-				current_wi.push(nwi);
-			}
+			load_tavern_wi(obj,chatopponent,myname);
 		}
 		else
 		{
@@ -3870,6 +3888,11 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			gametext_arr.push("\n"+chatopponent+": "+greeting);
 			current_memory = combinedmem + "\n<START>";
 			localsettings.opmode = 3;
+			//handle character book
+			if(obj.character_book && obj.character_book.entries && obj.character_book.entries.length>0)
+			{
+				load_tavern_wi(obj.character_book,chatopponent,myname);
+			}
 		}
 		render_gametext();
 	}
@@ -5928,7 +5951,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			if (current_memory == null || current_memory.trim() == "")
 			{
 				//if there is no memory, then we can be a lot of lenient with the character counts since the backend will truncate excess anyway
-				max_allowed_characters = Math.floor(maxctxlen * 6);
+				max_allowed_characters = Math.floor(maxctxlen * 4.6);
 			}
 
 			let truncated_context = concat_gametext(true, ""); //no need to truncate if memory is empty
@@ -6050,7 +6073,11 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			let max_mem_anote_len = Math.floor(max_allowed_characters*0.9);
 			let truncated_memory = current_memory.substring(current_memory.length - max_mem_anote_len);
 			if (truncated_memory != null && truncated_memory != "") {
-				truncated_memory += "\n";
+				const nomemorynewline = urlParams.get('nomemorynewline');
+				if(nomemorynewline!=1)
+				{
+					truncated_memory += "\n";
+				}
 			}
 
 			//if world info exists, we inject it right after the memory
@@ -6215,7 +6242,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					}
 					if (localsettings.opmode == 3) //stop on selfname found
 					{
-						submit_payload.stop_sequence = [localsettings.chatname + "\:"];
+						submit_payload.stop_sequence = [localsettings.chatname + "\:",("\n"+localsettings.chatname+" ")];
 						//for multichat, everyone else becomes a stopper token
 						if (localsettings.chatopponent.includes("||$||")) {
 							let coarr = localsettings.chatopponent.split("||$||");
@@ -6385,7 +6412,25 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					"temperature": submit_payload.params.temperature,
 					"top_p": submit_payload.params.top_p,
 				}
+				if(document.getElementById("clauderenamecompat").checked)
+				{
+					if(!claude_payload.prompt.toLowerCase().trim().startsWith('human:'))
+					{
+						claude_payload.prompt = "Human: "+claude_payload.prompt;
+					}
+					if(!claude_payload.prompt.toLowerCase().trim().endsWith('assistant:'))
+					{
+						if(localsettings.opmode==1)
+						{
+							claude_payload.prompt = claude_payload.prompt + " \nAssistant: Here is a continuation of the story: \nAssistant:";
+						}
+						else
+						{
+							claude_payload.prompt = claude_payload.prompt + " (cont.) Assistant:";
+						}
 
+					}
+				}
 
 				fetch(targetep, {
 					method: 'POST',
@@ -7331,7 +7376,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			document.getElementById("btnmode").classList.add("hidden");
 		}
 
-		if (gametext_arr.length == 0 && !(synchro_pending_stream!="" && pending_response_id!="")) {
+		if (gametext_arr.length == 0 && synchro_pending_stream=="" && pending_response_id=="") {
 
 			if (perfdata == null) {
 				document.getElementById("gametext").innerHTML = "Welcome to <span class=\"color_cyan\">KoboldAI Lite</span>!<br>You are in <span class=\"color_red\">Offline Mode</span>.<br>You will still be able to load and edit stories, but not generate new text."
@@ -7377,7 +7422,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			}
 			if(synchro_pending_stream!="")
 			{
-				fulltxt += "<span class=\"color_yellow\">" + escapeHtml(pending_context_preinjection) + synchro_pending_stream + "</span>";
+				fulltxt += "<span class=\"color_yellow\">" + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream) + "</span>";
 			}
 
 			if(localsettings.opmode==4 && !document.getElementById("allowediting").checked)
@@ -7464,6 +7509,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			}
 
 			//console.log("FT:" + fulltxt);
+			if(fulltxt=="" && gametext_arr.length == 0 && synchro_pending_stream=="" && pending_response_id!="")
+			{
+				fulltxt = "Generating...";
+			}
 			document.getElementById("gametext").innerHTML = fulltxt;
 		}
 
@@ -7726,7 +7775,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		}
 		if(synchro_pending_stream!="")
 		{
-			newbodystr += `<div class="incoming_msg"><div class="chat_received_msg"><div class="chat_received_withd_msg"><p>`+"<span class=\"color_yellow\">" + escapeHtml(pending_context_preinjection) + synchro_pending_stream + "</span>"+`</p></div></div></div>`;
+			newbodystr += `<div class="incoming_msg"><div class="chat_received_msg"><div class="chat_received_withd_msg"><p>`+"<span class=\"color_yellow\">" + escapeHtml(pending_context_preinjection) + escapeHtml(synchro_pending_stream) + "</span>"+`</p></div></div></div>`;
 		}
 
 		chatbody.innerHTML = newbodystr;
@@ -7898,7 +7947,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		<input class="form-control wiinputkey" id="wikey`+ i + `" placeholder="Key(s)" value="` + winame + `">
 		<input class="form-control wiinputkey `+ (curr.selective ? `` : `hidden`) + `" id="wikeysec` + i + `" placeholder="Sec. Key(s)" value="` + wisec + `">` + `</td>
 		<td class="col-10">
-		<textarea class="form-control wiinputval" id="wival`+ i + `" placeholder="What To Remember" rows="1">` + witxt + `</textarea>
+		<textarea class="form-control wiinputval" style="line-height:1.1" id="wival`+ i + `" placeholder="What To Remember" rows="3">` + witxt + `</textarea>
 		</td>`+
 				`
 		<td>
@@ -8004,8 +8053,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	}
 
 	function btn_retry() {
-		//do not retry if story is only 1 part long
-		if (pending_response_id == "" && gametext_arr.length > 1) {
+		if (pending_response_id == "" && gametext_arr.length > 0) {
 			let boxtextstash = document.getElementById("input_text").value;
 			document.getElementById("input_text").value = "";
 			let temp = gametext_arr[gametext_arr.length-1];
@@ -8376,9 +8424,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					<option value="claude-instant-v1">claude-instant-v1</option>
 					<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
 					<option value="claude-2">claude-2</option>
+					<option value="claude-2.0">claude-2.0</option>
 				</select>
 				<input type="checkbox" id="clauderenamecompat" onchange="">
-				<div class="box-label" title="Rename User and Bot tags to work with claude">Claude Compatibility Rename Fix</div>
+				<div class="box-label" title="Rename User and Bot tags to work with claude, force inject them otherwise">Claude Compatibility Rename Fix</div>
 			</div>
 			<div class="popupfooter">
 				<button type="button" class="btn btn-primary" onclick="connect_custom_endpoint()">Connect</button>
@@ -8494,7 +8543,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 						<div class="settinglabel">
 							<div class="justifyleft settingsmall">Top p Sampling <span class="helpicon">?<span class="helptext">Used
 										to discard unlikely text in the sampling process. Lower values will make text
-										more predictable but can become repetitious.</span></span></div>
+										more predictable but can become repetitious. Set to 1 to deactivate it.</span></span></div>
 							<input inputmode="numeric" class="justifyright flex-push-right settingsmall" id="top_p" value=80 oninput="
 						   document.getElementById('top_p_slide').value = this.value;">
 						</div>
@@ -8567,7 +8616,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 								id="rep_pen_range" title="Repetition Penalty Range"></td>
 								<td><input class="" type="text" placeholder="0" value="0"
 								id="rep_pen_slope" title="Repetition Penalty Slope"></td>
-								<td><input class="" type="text" placeholder="CSV" value="" id="sampler_order" style="width:80px;" title="Valid values are: 0=top_k, 1=top_a, 2=top_p, 3=tfs, 4=typ, 5=temp, 6=rep_pen" onblur="validate_samplers()"></td>
+								<td><input class="" type="text" placeholder="CSV" value="" id="sampler_order" style="width:70px;" title="Valid values are: 0=top_k, 1=top_a, 2=top_p, 3=tfs, 4=typ, 5=temp, 6=rep_pen" onblur="validate_samplers()"></td>
 								</tr>
 							  </table>
 						</div>
@@ -8635,8 +8684,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 								  <th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names with ||$||</span></span></th>
 								</tr>
 								<tr>
-								<td><input class="settinglabel mininiput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
-								<td><input class="settinglabel mininiput" type="text" placeholder="(Auto)" value="" id="chatopponent"  title="The name of the person you want to chat with"></td>
+								<td><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
+								<td><input class="settinglabel miniinput" type="text" placeholder="(Auto)" value="" id="chatopponent"  title="The name of the person you want to chat with"></td>
 								</tr>
 							  </table>
 							<div class="settinglabel">
@@ -8666,8 +8715,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 									  <th>End Seq.<span class="helpicon">?<span class="helptext">The sequence to end an instruction prompt</span></span></th>
 									</tr>
 									<tr>
-									<td><input class="settinglabel mininiput" type="text" placeholder="\\n### Instruction:\\n" value="" id="instruct_starttag" title="The sequence to start an instruction prompt"></td>
-									<td><input class="settinglabel mininiput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag"  title="The sequence to end an instruction prompt"></td>
+									<td><input class="settinglabel miniinput" type="text" placeholder="\\n### Instruction:\\n" value="" id="instruct_starttag" title="The sequence to start an instruction prompt"></td>
+									<td><input class="settinglabel miniinput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag"  title="The sequence to end an instruction prompt"></td>
 									</tr>
 								</table>
 
@@ -8773,6 +8822,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					<option value="480">Weak</option>
 					<option value="320">Medium</option>
 					<option value="160">Strong</option>
+					<option value="0">Immediate</option>
 				</select>
 			<br><br>
 			<div class="justifyleft settinglabel">Extra Stopping Sequence (Kobold API Only) <span class="helpicon">?<span

From 824050471b5e1a8f369947707b52efde243bf2be Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 7 Aug 2023 20:03:09 +0200
Subject: [PATCH 090/107] Default to new UI

---
 aiserver.py        | 12 ++++++------
 tpu_mtj_backend.py |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 133d20bc..a7695a76 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1835,8 +1835,8 @@ def load_model(model_backend, initial_load=False):
         os.mkdir("./softprompts")
     koboldai_vars.splist = [[f, get_softprompt_desc(os.path.join("./softprompts", f),None,True)] for f in os.listdir("./softprompts") if os.path.isfile(os.path.join("./softprompts", f)) and valid_softprompt(os.path.join("./softprompts", f))]
     if initial_load and koboldai_vars.cloudflare_link != "":
-        logger.message(f"KoboldAI has finished loading and is available at the following link for UI 1: {koboldai_vars.cloudflare_link}")
-        logger.message(f"KoboldAI has finished loading and is available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui")
+        logger.message(f"KoboldAI has finished loading and is available at the following link: {koboldai_vars.cloudflare_link}")
+        logger.message(f"KoboldAI has finished loading and is available at the following link for the Classic UI: {koboldai_vars.cloudflare_link}/classic")
         logger.message(f"KoboldAI has finished loading and is available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
         logger.message(f"KoboldAI has finished loading and is available at the following link for the API: {koboldai_vars.cloudflare_link}/api")
 
@@ -1868,8 +1868,7 @@ def require_allowed_ip(func):
 
 
 # Set up Flask routes
-@app.route('/')
-@app.route('/index')
+@app.route('/classic')
 @require_allowed_ip
 def index():
     if args.no_ui:
@@ -5532,6 +5531,7 @@ def lite_html():
 #==================================================================#
 # UI V2 CODE
 #==================================================================#
+@app.route('/')
 @app.route('/new_ui')
 @require_allowed_ip
 @logger.catch
@@ -10896,8 +10896,8 @@ def run():
                 if not koboldai_vars.use_colab_tpu and args.model:
                     # If we're using a TPU our UI will freeze during the connection to the TPU. To prevent this from showing to the user we 
                     # delay the display of this message until after that step
-                    logger.message(f"KoboldAI is still loading your model but available at the following link for UI 1: {cloudflare}")
-                    logger.message(f"KoboldAI is still loading your model but available at the following link for UI 2: {cloudflare}/new_ui")
+                    logger.message(f"KoboldAI is still loading your model but available at the following link: {cloudflare}")
+                    logger.message(f"KoboldAI is still loading your model but available at the following link for the Classic UI: {cloudflare}/classic")
                     logger.message(f"KoboldAI is still loading your model but available at the following link for KoboldAI Lite: {cloudflare}/lite")
                     logger.message(f"KoboldAI is still loading your model but available at the following link for the API: [Loading Model...]")
                     logger.message(f"While the model loads you can use the above links to begin setting up your session, for generations you must wait until after its done loading.")
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 3f524d12..c49e27da 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1116,8 +1116,8 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
     thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ())
     maps.thread_resources.env = thread_resources_env
     if initial_load:
-        logger.message(f"KoboldAI has still loading your model but available at the following link for UI 1: {koboldai_vars.cloudflare_link}")
-        logger.message(f"KoboldAI has still loading your model but available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui")
+        logger.message(f"KoboldAI has still loading your model but available at the following link: {koboldai_vars.cloudflare_link}")
+        logger.message(f"KoboldAI has still loading your model but available at the following link for the Classic UI: {koboldai_vars.cloudflare_link}/classic")
         logger.message(f"KoboldAI has still loading your model but available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
         logger.message(f"KoboldAI has still loading your model but available at the following link for the API: [Loading Model...]")
         logger.message(f"While the model loads you can use the above links to begin setting up your session, for generations you must wait until after its done loading.")

From 1632f3c6848b2da35da44ff606277ec6c9410f78 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 7 Aug 2023 13:59:57 -0500
Subject: [PATCH 091/107] UI: Change mobile aspect ratio threshold from 7/5 to
 5/6

---
 static/koboldai.css | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index ea5c00d3..be9fde61 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1,10 +1,10 @@
 /*----------------Global Colors------------------*/
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 	:root {
 		--flyout_menu_width: 100%;
 	}
 }
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 	:root {
 		--flyout_menu_width: 402px;
 	}
@@ -448,19 +448,19 @@ border-top-right-radius: var(--tabs_rounding);
 	cursor: pointer;
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 /* mobile */
 .menu_icon.hidden {
 	display: inline-block !important;
 }
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 .menu_pin {
 	display: none;
 }
 }
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 .menu_pin {
 	position: absolute;
 	top:10px;
@@ -516,7 +516,7 @@ border-top-right-radius: var(--tabs_rounding);
 	will-change: transform;
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 .SideMenu.pinned {
 	right: calc(100% - var(--flyout_menu_width));
 	background-color: var(--flyout_background_pinned);
@@ -906,7 +906,7 @@ border-top-right-radius: var(--tabs_rounding);
 	grid-area: lefticon;
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 /* mobile */
 .right_menu_icon.hidden {
 	display: inline-block !important;
@@ -937,7 +937,7 @@ border-top-right-radius: var(--tabs_rounding);
 	left: calc(100% - var(--flyout_menu_width));
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 /*  Desktop Mode  */
 .rightSideMenu.pinned {
 	left: calc(100% - var(--flyout_menu_width));
@@ -959,14 +959,14 @@ border-top-right-radius: var(--tabs_rounding);
 	filter: brightness(40%);
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 /* mobile */
 .story_menu_pin {
 	display: none;
 }
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 .story_menu_pin {
 	position: absolute;
 	top:10px;
@@ -1542,7 +1542,7 @@ body {
 	background-color: #cacaca80;
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 /* ------------------- Desktop Mode --------------------------- */
 .main-grid {
 	transition: margin-left .5s, margin-right .5s;
@@ -1576,7 +1576,7 @@ body {
 
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 /* mobile */
 .main-grid {
 	transition: margin-left .5s;
@@ -1653,7 +1653,7 @@ body {
 	display: none;
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 	#option-container {
 		margin-top: 5px;
 		width: 100%;
@@ -1666,7 +1666,7 @@ body {
 	}
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 	#option-container {
 		margin-top: 5px;
 		width: 100%;
@@ -1960,7 +1960,7 @@ body {
 	overflow: hidden;
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 	.paddingimage {
 		grid-area: paddingimage;
 		margin: auto auto auto auto;
@@ -1968,14 +1968,14 @@ body {
 
 }
 
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 	.paddingimage {
 		visibility: hidden;
 	}
 }
 
 /*---------------------------------- Popups -------------------------------------------------*/
-@media only screen and (max-aspect-ratio: 7/5) {
+@media only screen and (max-aspect-ratio: 5/6) {
 	.popup {
 		position: absolute;
 		top: 10vh;
@@ -1992,7 +1992,7 @@ body {
 	}
 }
 
-@media only screen and (min-aspect-ratio: 7/5) {
+@media only screen and (min-aspect-ratio: 5/6) {
 	.popup {
 		position: absolute;
 		top: 10vh;

From 7f2085ffe8e165f28c1641f058800bcd3c64dc75 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 7 Aug 2023 16:00:49 -0500
Subject: [PATCH 092/107] UI: Fix token streaming gobbling trailing whitespace

which ended up being mostly newlines
---
 static/koboldai.css | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/static/koboldai.css b/static/koboldai.css
index ea5c00d3..21130cba 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1642,6 +1642,10 @@ body {
 	font-style: italic;
 }
 
+#token-stream-buffer {
+	white-space: pre;
+}
+
 #option-container {
 	margin-top: 10px;
 	grid-area: options;

From 9704c86aeeefb6918b5739ca31732e126ca4b2f9 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Mon, 7 Aug 2023 21:13:01 -0500
Subject: [PATCH 093/107] Actually do pre-wrap instead

just pre makes long texts without whitespace not wrap
---
 static/koboldai.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 21130cba..344558c7 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1643,7 +1643,7 @@ body {
 }
 
 #token-stream-buffer {
-	white-space: pre;
+	white-space: pre-wrap;
 }
 
 #option-container {

From 6143071b27e089c797e76cffbac4ab937703b8bd Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 8 Aug 2023 14:51:15 +0200
Subject: [PATCH 094/107] Make settings folder early

---
 aiserver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index a7695a76..10f327c7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -75,6 +75,10 @@ import ipaddress
 from functools import wraps
 from modeling.pickling import RestrictedUnpickler, use_custom_unpickler
 
+# Make settings folder early so we can depend on it anywhere
+if not os.path.exists("settings/"):
+    os.mkdir("settings")
+
 try:
     from transformers.models.opt.modeling_opt import OPTDecoder
 except:

From 1b253ce95f1fe0d3a99ac62ead0627d64ca3c093 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 10 Aug 2023 17:08:48 +0200
Subject: [PATCH 095/107] 4-bit dependency fixes

---
 environments/huggingface.yml | 6 ++++--
 requirements.txt             | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 9f05a1f8..8eecd344 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -41,14 +41,16 @@ dependencies:
     - ansi2html
     - flask_compress
     - ijson
-    - bitsandbytes==0.41.0; sys_platform == 'linux'
-    - https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl; sys_platform == 'win32'
+    - bitsandbytes==0.40.0; sys_platform == 'linux'
+    - https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
     - ftfy
     - pydub
     - diffusers
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - einops
     - peft==0.3.0
     - scipy
diff --git a/requirements.txt b/requirements.txt
index 1b39f025..5c81becd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,8 +28,8 @@ psutil
 ansi2html
 flask_compress
 ijson
-bitsandbytes==0.41.0; sys_platform == 'linux'
-https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl; sys_platform == 'win32'
+bitsandbytes==0.40.0; sys_platform == 'linux'
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
 ftfy
 py==1.11.0
 pydub

From 54addfc234b1748ffaba5a4ffe11d1a8ab227f4b Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 10 Aug 2023 17:18:53 +0200
Subject: [PATCH 096/107] AutoGPTQ fallback

---
 .../inference_models/gptq_hf_torch/class.py   | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 6fae6779..b44fcd7a 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -323,19 +323,22 @@ class model_backend(HFTorchInferenceModel):
             enable=self.lazy_load,
             dematerialized_modules=False,
         ):
-            if model_type == "gptj":
-                model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-            elif model_type == "gpt_neox":
-                model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-            elif model_type == "llama":
-                model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-            elif model_type == "opt":
-                model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-            elif model_type == "mpt":
-                model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-            elif model_type == "gpt_bigcode":
-                model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
-            else:
+            try:
+                if model_type == "gptj":
+                    model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                elif model_type == "gpt_neox":
+                    model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                elif model_type == "llama":
+                    model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                elif model_type == "opt":
+                    model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                elif model_type == "mpt":
+                    model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                elif model_type == "gpt_bigcode":
+                    model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
+                else:
+                    raise RuntimeError("Model not supported by Occam's GPTQ")
+            except:
                 try:
                     import auto_gptq
                     from auto_gptq import AutoGPTQForCausalLM

From f2d7ef3aca2f883580da7bd617591244b07256e9 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 10 Aug 2023 17:41:31 +0200
Subject: [PATCH 097/107] AutoGPTQ breakmodel

---
 modeling/inference_models/gptq_hf_torch/class.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index b44fcd7a..eb3d2475 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -350,7 +350,7 @@ class model_backend(HFTorchInferenceModel):
                 auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig
                 auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM
 
-                model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"))
+                model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map)
 
                 # Patch in embeddings function
                 def get_input_embeddings(self):

From 9c7ebe3b047b5fbe70fe41156a53c47970a53d1c Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 10 Aug 2023 18:10:48 +0200
Subject: [PATCH 098/107] Better AutoGPTQ fallback

---
 .../inference_models/gptq_hf_torch/class.py   | 36 ++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index eb3d2475..eff71bc0 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -169,6 +169,7 @@ class model_backend(HFTorchInferenceModel):
         self.init_model_config()
 
         self.lazy_load = True
+        self.implementation = "occam"
 
         gpulayers = self.breakmodel_config.gpu_blocks
 
@@ -323,22 +324,25 @@ class model_backend(HFTorchInferenceModel):
             enable=self.lazy_load,
             dematerialized_modules=False,
         ):
-            try:
-                if model_type == "gptj":
-                    model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-                elif model_type == "gpt_neox":
-                    model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-                elif model_type == "llama":
-                    model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-                elif model_type == "opt":
-                    model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-                elif model_type == "mpt":
-                    model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
-                elif model_type == "gpt_bigcode":
-                    model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
-                else:
-                    raise RuntimeError("Model not supported by Occam's GPTQ")
-            except:
+            if self.implementation == "occam":
+                try:
+                    if model_type == "gptj":
+                        model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                    elif model_type == "gpt_neox":
+                        model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                    elif model_type == "llama":
+                        model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                    elif model_type == "opt":
+                        model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                    elif model_tseype == "mpt":
+                        model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
+                    elif model_type == "gpt_bigcode":
+                        model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
+                    else:
+                        raise RuntimeError("Model not supported by Occam's GPTQ")
+                except:
+                    self.implementation = "AutoGPTQ"
+            if self.implementation == "AutoGPTQ":
                 try:
                     import auto_gptq
                     from auto_gptq import AutoGPTQForCausalLM

From 2628726e1c16a1aa0cf9610635facee8a9da121b Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 10 Aug 2023 19:34:08 +0200
Subject: [PATCH 099/107] Dont use exllama on fail

---
 modeling/inference_models/gptq_hf_torch/class.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index eff71bc0..804ca682 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -354,7 +354,10 @@ class model_backend(HFTorchInferenceModel):
                 auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig
                 auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM
 
-                model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map)
+                try:
+                    model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map)
+                except:
+                    model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
 
                 # Patch in embeddings function
                 def get_input_embeddings(self):

From 9cb93d6b4c845a830228fda8c58749da22b7db20 Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Thu, 10 Aug 2023 23:56:44 +0200
Subject: [PATCH 100/107] Add some 13B's for easier beta testing

---
 colab/GPU.ipynb | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb
index 78219b06..1835ce0e 100644
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@@ -80,7 +80,7 @@
         "#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
         "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
         "\n",
-        "Model = \"Nerys V2 6B\" #@param [\"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
+        "Model = \"Nerys V2 6B\" #@param [\"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
         "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
         "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
         "use_google_drive = True #@param {type:\"boolean\"}\n",
@@ -146,6 +146,36 @@
         "  Model = \"EleutherAI/gpt-neo-2.7B\"\n",
         "  path = \"\"\n",
         "  download = \"\"\n",
+        "elif Model == \"Huginn 13B (United)\":\n",
+        "  Model = \"The-Face-Of-Goonery/Huginn-13b-v1.2\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
+        "elif Model == \"Chronos 13B (United)\":\n",
+        "  Model = \"elinas/chronos-13b-v2\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
+        "elif Model == \"Airoboros M2.0 13B (United)\":\n",
+        "  Model = \"jondurbin/airoboros-l2-13b-gpt4-m2.0\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
+        "elif Model == \"MythoMax 13B (United)\":\n",
+        "  Model = \"Gryphe/MythoMax-L2-13b\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
+        "elif Model == \"Spring Dragon 13B (United)\":\n",
+        "  Model = \"Henk717/spring-dragon\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
+        "elif Model == \"Holodeck 13B (United)\":\n",
+        "  Model = \"KoboldAI/LLAMA2-13B-Holodeck-1\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "  Version = \"United\"\n",
         "\n",
         "if Provider == \"Localtunnel\":\n",
         "  tunnel = \"--localtunnel yes\"\n",

From 1e87c05e68fbcc1c554f79ccc84dac01b97e5ee0 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 11 Aug 2023 17:36:41 +0200
Subject: [PATCH 101/107] Fix discord link

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 0e939f6b..976b6bcd 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -677,7 +677,7 @@ class model_settings(settings):
         <div id='welcome-logo-container'><img id='welcome-logo' src='static/Welcome_Logo.png' draggable='False'></div>
         <div class='welcome_text'>
             <div id="welcome-text-content">Please load a model from the left.<br/>
-                If you encounter any issues, please click the Download debug dump link in the Home tab on the left flyout and attach the downloaded file to your error report on <a href='https://github.com/ebolam/KoboldAI/issues'>Github</a>, <a href='https://www.reddit.com/r/KoboldAI/'>Reddit</a>, or <a href='https://discord.gg/XuQWadgU9k'>Discord</a>.
+                If you encounter any issues, please click the Download debug dump link in the Home tab on the left flyout and attach the downloaded file to your error report on <a href='https://github.com/ebolam/KoboldAI/issues'>Github</a>, <a href='https://www.reddit.com/r/KoboldAI/'>Reddit</a>, or <a href='https://koboldai.org/discord'>Discord</a>.
                 A redacted version (without story text) is available.
             </div>
         </div>""" # Custom Welcome Text

From ee93fe6e4a2536fb6f7463a755e3503621389da9 Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Fri, 11 Aug 2023 22:39:49 +0200
Subject: [PATCH 102/107] Add model cleaner

---
 colab/GPU.ipynb | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb
index 1835ce0e..7232f81b 100644
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@@ -223,6 +223,20 @@
       "metadata": {
         "id": "Lrm840I33hkC"
       }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title <b>Model Cleaner</b>\n",
+        "#@markdown Out of space? Run this to remove all cached models (Google Drive models are not effected).\n",
+        "!rm -rf /content/KoboldAI-Client/cache/*\n"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "5k8fK4F6UiTs"
+      },
+      "execution_count": null,
+      "outputs": []
     }
   ]
 }
\ No newline at end of file

From 116a88b46c2dc70054949c6a9d8eb0613d0a29a9 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 13 Aug 2023 16:45:31 +0200
Subject: [PATCH 103/107] Better stable diffusion

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index de013a73..0552eb60 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7325,7 +7325,7 @@ def generate_image(prompt: str) -> Optional[Image.Image]:
     if koboldai_vars.img_gen_priority == 4:
         # Check if stable-diffusion-webui API option selected and use that if found.
         return text2img_api(prompt)
-    elif ((not koboldai_vars.hascuda or not os.path.exists("models/stable-diffusion-v1-4")) and koboldai_vars.img_gen_priority != 0) or  koboldai_vars.img_gen_priority == 3:
+    elif ((not koboldai_vars.hascuda or not os.path.exists("functional_models/stable-diffusion")) and koboldai_vars.img_gen_priority != 0) or  koboldai_vars.img_gen_priority == 3:
         # If we don't have a GPU, use horde if we're allowed to
         return text2img_horde(prompt)
 
@@ -7351,7 +7351,7 @@ def text2img_local(prompt: str) -> Optional[Image.Image]:
     logger.debug("Generating Image")
     from diffusers import StableDiffusionPipeline
     if koboldai_vars.image_pipeline is None:
-        pipe = tpool.execute(StableDiffusionPipeline.from_pretrained, "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, cache="functional_models/stable-diffusion").to("cuda")
+        pipe = tpool.execute(StableDiffusionPipeline.from_pretrained, "XpucT/Deliberate", safety_checker=None, torch_dtype=torch.float16, cache="functional_models/stable-diffusion").to("cuda")
     else:
         pipe = koboldai_vars.image_pipeline.to("cuda")
     logger.debug("time to load: {}".format(time.time() - start_time))

From e90903946dbdc76768753128fa6efd42b9ae0cec Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 13 Aug 2023 17:36:17 +0200
Subject: [PATCH 104/107] AutoGPTQ updates

---
 environments/huggingface.yml                  |  4 +--
 .../inference_models/gptq_hf_torch/class.py   | 27 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 8eecd344..00d27984 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -49,8 +49,8 @@ dependencies:
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - einops
     - peft==0.3.0
     - scipy
diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 804ca682..b48f1d56 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -153,6 +153,32 @@ class model_backend(HFTorchInferenceModel):
         gptq_model, _, _, _, _ = load_model_gptq_settings(model_path)
         return bool(gptq_model)
 
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
+        if model_name != 'customhuggingface' or "custom_model_name" in parameters:
+            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                    temp = json.load(f)
+            else:
+                temp = {}
+            requested_parameters.append({
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Implementation",
+                                        "id": "implementation",
+                                        "default": temp['implementation'] if 'implementation' in temp else 'occam',
+                                        "tooltip": "Which GPTQ provider to use?",
+                                        "menu_path": "Layers",
+                                        "children": [{'text': 'Occam GPTQ', 'value': 'occam'}, {'text': 'AutoGPTQ', 'value': 'AutoGPTQ'}],
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+
+    def set_input_parameters(self, parameters):
+        super().set_input_parameters(parameters)
+        self.implementation = parameters['implementation'] if 'implementation' in parameters else "occam"
+
     def _load(self, save_model: bool, initial_load: bool) -> None:
         try:
             from hf_bleeding_edge import AutoModelForCausalLM
@@ -169,7 +195,6 @@ class model_backend(HFTorchInferenceModel):
         self.init_model_config()
 
         self.lazy_load = True
-        self.implementation = "occam"
 
         gpulayers = self.breakmodel_config.gpu_blocks
 

From 1c65528dbf8674e4baf3612073a07039fa89581a Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Mon, 14 Aug 2023 03:28:29 +0200
Subject: [PATCH 105/107] Fix BnB on Colab

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5c81becd..8dc7f9a2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,7 +28,7 @@ psutil
 ansi2html
 flask_compress
 ijson
-bitsandbytes==0.40.0; sys_platform == 'linux'
+bitsandbytes==0.40.0.post4; sys_platform == 'linux'
 https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
 ftfy
 py==1.11.0
@@ -41,4 +41,4 @@ safetensors==0.3.1
 git+https://github.com/0cc4m/hf_bleeding_edge/
 einops
 peft==0.3.0
-scipy
\ No newline at end of file
+scipy

From 3f28503b87e82ea5160e8aa3206f6b641884e88b Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Tue, 15 Aug 2023 22:19:18 +0200
Subject: [PATCH 106/107] Update huggingface.yml

---
 environments/huggingface.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 00d27984..0286017b 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -41,7 +41,7 @@ dependencies:
     - ansi2html
     - flask_compress
     - ijson
-    - bitsandbytes==0.40.0; sys_platform == 'linux'
+    - bitsandbytes==0.40.0.post4; sys_platform == 'linux'
     - https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'
     - ftfy
     - pydub

From 87934ee3936c22a3b0b4008c2b40c607205e3dbf Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 18 Aug 2023 21:49:05 +0200
Subject: [PATCH 107/107] AutoGPTQ Exllama compile

---
 environments/huggingface.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 0286017b..004c7ecc 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -50,7 +50,7 @@ dependencies:
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - einops
     - peft==0.3.0
     - scipy