From 71aee4dbd8f1d429e0ebd27dbf98bfd6fcf6c52c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 10 May 2023 16:30:46 -0400
Subject: [PATCH 01/68] First concept of model plugins with a conceptual UI.
 Completely breaks UI2 model loading.

---
 aiserver.py                                   | 123 ++--
 modeling/inference_model.py                   |   9 +
 modeling/inference_models/api.py              |  26 +-
 modeling/inference_models/basic_api.py        |  29 +-
 modeling/inference_models/generic_hf_torch.py |   8 +-
 modeling/inference_models/hf.py               | 190 ------
 modeling/inference_models/hf_mtj.py           |  22 +-
 modeling/inference_models/horde.py            |  88 ++-
 modeling/inference_models/openai.py           |  85 ++-
 modeling/inference_models/parents/hf.py       | 219 +++++++
 .../{ => parents}/hf_torch.py                 |  56 +-
 modeling/inference_models/rwkv.py             |  26 +-
 static/koboldai.css                           |  44 ++
 static/koboldai.js                            | 548 +++++++++---------
 templates/popups.html                         |  30 +-
 templates/templates.html                      |  19 +
 16 files changed, 912 insertions(+), 610 deletions(-)
 delete mode 100644 modeling/inference_models/hf.py
 create mode 100644 modeling/inference_models/parents/hf.py
 rename modeling/inference_models/{ => parents}/hf_torch.py (94%)

diff --git a/aiserver.py b/aiserver.py
index e744d18e..e7227c81 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -168,6 +168,7 @@ class MenuFolder(MenuItem):
             "size": "",
             "isMenu": True,
             "isDownloaded": False,
+            "isDirectory":  False
         }
 
 class MenuModel(MenuItem):
@@ -200,8 +201,28 @@ class MenuModel(MenuItem):
             "size": self.vram_requirements,
             "isMenu": False,
             "isDownloaded": self.is_downloaded,
+            "isDirectory": False,
         }
 
+class MenuPath(MenuItem):
+    def to_ui1(self) -> list:
+        return [
+            self.label,
+            self.name,
+            "",
+            True,
+        ]
+    
+    def to_json(self) -> dict:
+        return {
+            "label": self.label,
+            "name": self.name,
+            "size": "",
+            "isMenu": True,
+            "isDownloaded": False,
+            "isDirectory": True,
+            "path": "./models"
+        }
 
 # AI models Menu
 # This is a dict of lists where they key is the menu name, and the list is the menu items.
@@ -209,8 +230,8 @@ class MenuModel(MenuItem):
 # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
 model_menu = {
     "mainmenu": [
-        MenuModel("Load a model from its directory", "NeoCustom"),
-        MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
+        MenuPath("Load a model from its directory", "NeoCustom"),
+        MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
         MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
         MenuFolder("Adventure Models", "adventurelist"),
         MenuFolder("Novel Models", "novellist"),
@@ -600,6 +621,15 @@ utils.socketio = socketio
 # Weird import position to steal koboldai_vars from utils
 from modeling.patches import patch_transformers
 
+#Load all of the model importers
+import importlib
+model_loader_code = {}
+model_loaders = {}
+for module in os.listdir("./modeling/inference_models"):
+    if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
+        model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
+        model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader()
+        
 
 old_socketio_on = socketio.on
 def new_socketio_on(*a, **k):
@@ -906,6 +936,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
         )
 
 def get_folder_path_info(base):
+    if base is None:
+        return [], []
     if base == 'This PC':
         breadcrumbs = [['This PC', 'This PC']]
         paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
@@ -1932,25 +1964,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
         koboldai_vars.breakmodel = False
 
         if koboldai_vars.model == "Colab":
-            from modeling.inference_models.basic_api import BasicAPIInferenceModel
-            model = BasicAPIInferenceModel()
+            from modeling.inference_models.basic_api import model_loader
+            model = model_loader()
         elif koboldai_vars.model == "API":
-            from modeling.inference_models.api import APIInferenceModel
-            model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", ""))
+            from modeling.inference_models.api import model_loader
+            model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
         elif koboldai_vars.model == "CLUSTER":
-            from modeling.inference_models.horde import HordeInferenceModel
-            model = HordeInferenceModel()
+            from modeling.inference_models.horde import model_loader
+            model = model_loader()
         elif koboldai_vars.model == "OAI":
-            from modeling.inference_models.openai import OpenAIAPIInferenceModel
-            model = OpenAIAPIInferenceModel()
+            from modeling.inference_models.openai import model_loader
+            model = model_loader()
 
         model.load(initial_load=initial_load)
     # TODO: This check sucks, make a model object or somethign
     elif "rwkv" in koboldai_vars.model:
         if koboldai_vars.use_colab_tpu:
             raise RuntimeError("RWKV is not supported on the TPU.")
-        from modeling.inference_models.rwkv import RWKVInferenceModel
-        model = RWKVInferenceModel(koboldai_vars.model)
+        from modeling.inference_models.rwkv import model_loader
+        model = model_loader(koboldai_vars.model)
         model.load()
     elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
         # HF Torch
@@ -1961,8 +1993,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
             except:
                 pass
 
-        from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
-        model = GenericHFTorchInferenceModel(
+        from modeling.inference_models.generic_hf_torch import model_loader
+        model = model_loader(
             koboldai_vars.model,
             lazy_load=koboldai_vars.lazy_load,
             low_mem=args.lowmem
@@ -1975,8 +2007,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
         logger.info(f"Pipeline created: {koboldai_vars.model}")
     else:
         # TPU
-        from modeling.inference_models.hf_mtj import HFMTJInferenceModel
-        model = HFMTJInferenceModel(
+        from modeling.inference_models.hf_mtj import model_loader
+        model = model_loader(
             koboldai_vars.model
         )
         model.load(
@@ -6430,7 +6462,9 @@ def UI_2_retry(data):
 @socketio.on('load_model_button')
 @logger.catch
 def UI_2_load_model_button(data):
-    sendModelSelection()
+    emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]})
+    
+
     
 #==================================================================#
 # Event triggered when user clicks the a model
@@ -6438,6 +6472,38 @@ def UI_2_load_model_button(data):
 @socketio.on('select_model')
 @logger.catch
 def UI_2_select_model(data):
+    logger.debug("Clicked on model entry: {}".format(data))
+    if data["name"] in model_menu and data['ismenu'] == "true":
+        emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
+    else:
+        #Get load methods
+        logger.debug("Asking for model info on potential model: {}".format(data))
+        valid = False
+        if 'path' not in data or data['path'] == "":
+            valid_loaders = {}
+            for model_loader in model_loaders:
+                logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"])))
+                if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+                    valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                    valid = True
+            if valid:
+                logger.debug("Valid Loaders: {}".format(valid_loaders))
+                emit("selected_model_info", valid_loaders)
+        if not valid:
+            #Get directories
+            paths, breadcrumbs = get_folder_path_info(data['path'])
+            output = []
+            for path in paths:
+                valid=False
+                for model_loader in model_loaders:
+                    if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"):
+                        valid=True
+                        break
+                output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
+    
+    return
+    
     
     #We've selected a menu
     if data['model'] in model_menu:
@@ -6462,26 +6528,9 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    if not os.path.exists("settings/"):
-        os.mkdir("settings")
-    changed = True
-    if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
-        with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
-            file_data = file.read().split('\n')[:2]
-            if len(file_data) < 2:
-                file_data.append("0")
-            gpu_layers, disk_layers = file_data
-            if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']:
-                changed = False
-    if changed:
-        f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w")
-        f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers']))
-        f.close()
-    koboldai_vars.colaburl = data['url'] + "/request"
-    koboldai_vars.model = data['model']
-    koboldai_vars.custmodpth = data['path']
-    print("loading Model")
-    load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
+    logger.info("loading Model")
+    logger.info(data)
+    #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
 # Event triggered when load story is clicked
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index b253c5bf..27ad46db 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,15 @@ class InferenceModel:
         ]
         self.tokenizer = None
         self.capabilties = ModelCapabilities()
+    
+    def is_valid(self, model_name, model_path, menu_path, vram):
+        return True
+        
+    def requested_parameters(self, model_name, model_path, menu_path, vram):
+        return {}
+        
+    def define_input_parameters(self):
+        return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
         """User-facing load function. Do not override this; try `_load()` instead."""
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index d25505b0..41088bc7 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -22,9 +22,31 @@ class APIException(Exception):
     """To be used for errors when using the Kobold API as an interface."""
 
 
-class APIInferenceModel(InferenceModel):
-    def __init__(self, base_url: str) -> None:
+class model_loader(InferenceModel):
+    def __init__(self) -> None:
         super().__init__()
+        #self.base_url = ""
+
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "API"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "base_url",
+                                        "default": False,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, base_url=""):
         self.base_url = base_url.rstrip("/")
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index c96eb42c..d7fc0863 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -19,12 +19,37 @@ class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class BasicAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "Colab"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "colaburl",
+                                        "default": False,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the Colab KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, colaburl=""):
+        self.colaburl = colaburl
+
+    def _initialize_model(self):
+        return
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
@@ -68,7 +93,7 @@ class BasicAPIInferenceModel(InferenceModel):
         }
 
         # Create request
-        req = requests.post(utils.koboldai_vars.colaburl, json=reqdata)
+        req = requests.post(self.colaburl, json=reqdata)
 
         if req.status_code != 200:
             raise BasicAPIException(f"Bad status code {req.status_code}")
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index aa602b1a..366fbbb7 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -20,10 +20,14 @@ except ModuleNotFoundError as e:
     if not utils.koboldai_vars.use_colab_tpu:
         raise e
 
-from modeling.inference_models.hf_torch import HFTorchInferenceModel
+from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
 
 
-class GenericHFTorchInferenceModel(HFTorchInferenceModel):
+class model_loader(HFTorchInferenceModel):
+    
+    def _initialize_model(self):
+        return
+    
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
deleted file mode 100644
index cd609fed..00000000
--- a/modeling/inference_models/hf.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import os
-from typing import Optional
-from transformers import AutoConfig
-
-import utils
-import koboldai_settings
-from logger import logger
-from modeling.inference_model import InferenceModel
-
-
-class HFInferenceModel(InferenceModel):
-    def __init__(self, model_name: str) -> None:
-        super().__init__()
-        self.model_config = None
-        self.model_name = model_name
-
-        self.model = None
-        self.tokenizer = None
-
-    def _post_load(self) -> None:
-        # These are model specific tokenizer overrides if a model has bad defaults
-        if utils.koboldai_vars.model_type == "llama":
-            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
-            self.tokenizer.add_bos_token = False
-
-            # HF transformers no longer supports decode_with_prefix_space
-            # We work around this by wrapping decode, encode, and __call__
-            # with versions that work around the 'prefix space' misfeature
-            # of sentencepiece.
-            vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
-            has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
-
-            # Wrap 'decode' with a method that always returns text starting with a space
-            # when the head token starts with a space. This is what 'decode_with_prefix_space'
-            # used to do, and we implement it using the same technique (building a cache of
-            # tokens that should have a prefix space, and then prepending a space if the first
-            # token is in this set.) We also work around a bizarre behavior in which decoding
-            # a single token 13 behaves differently than decoding a squence containing only [13].
-            original_decode = type(self.tokenizer.tokenizer).decode
-            def decode_wrapper(self, token_ids, *args, **kwargs):
-                first = None
-                # Note, the code below that wraps single-value token_ids in a list
-                # is to work around this wonky behavior:
-                #   >>> t.decode(13)
-                #   '<0x0A>'
-                #   >>> t.decode([13])
-                #   '\n'
-                # Not doing this causes token streaming to receive <0x0A> characters
-                # instead of newlines.
-                if isinstance(token_ids, int):
-                    first = token_ids
-                    token_ids = [first]
-                elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
-                    # Tensors don't support the Python standard of 'empty is False'
-                    # and the special case of dimension 0 tensors also needs to be
-                    # handled separately.
-                    if token_ids.dim() == 0:
-                        first = int(token_ids.item())
-                        token_ids = [first]
-                    elif len(token_ids) > 0:
-                        first = int(token_ids[0])
-                elif token_ids:
-                    first = token_ids[0]
-                result = original_decode(self, token_ids, *args, **kwargs)
-                if first is not None and first in has_prefix_space:
-                    result = " " + result
-                return result
-            # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
-            object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
-
-            # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
-            # The problem is that "Bob" at the start of text is encoded as if it is
-            # " Bob". This creates a problem because it means you can't split text, encode
-            # the pieces, concatenate the tokens, decode them, and get the original text back.
-            # The workaround is to prepend a known token that (1) starts with a space; and
-            # (2) is not the prefix of any other token. After searching through the vocab
-            # " ," (space comma) is the only token containing only printable ascii characters
-            # that fits this bill. By prepending ',' to the text, the original encode
-            # method always returns [1919, ...], where the tail of the sequence is the
-            # actual encoded result we want without the prefix space behavior.
-            original_encode = type(self.tokenizer.tokenizer).encode
-            def encode_wrapper(self, text, *args, **kwargs):
-                if type(text) is str:
-                    text = ',' + text
-                    result = original_encode(self, text, *args, **kwargs)
-                    result = result[1:]
-                else:
-                    result = original_encode(self, text, *args, **kwargs)
-                return result
-            object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
-
-            # Since 'encode' is documented as being deprecated, also override __call__.
-            # This doesn't appear to currently be used by KoboldAI, but doing so
-            # in case someone uses it in the future.
-            original_call = type(self.tokenizer.tokenizer).__call__
-            def call_wrapper(self, text, *args, **kwargs):
-                if type(text) is str:
-                    text = ',' + text
-                    result = original_call(self, text, *args, **kwargs)
-                    result = result[1:]
-                else:
-                    result = original_call(self, text, *args, **kwargs)
-                return result
-            object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
-
-        elif utils.koboldai_vars.model_type == "opt":
-            self.tokenizer._koboldai_header = self.tokenizer.encode("")
-            self.tokenizer.add_bos_token = False
-            self.tokenizer.add_prefix_space = False
-
-        # Change newline behavior to match model quirks
-        if utils.koboldai_vars.model_type == "xglm":
-            # Default to </s> newline mode if using XGLM
-            utils.koboldai_vars.newlinemode = "s"
-        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
-            # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
-            utils.koboldai_vars.newlinemode = "ns"
-
-        # Clean up tokens that cause issues
-        if (
-            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
-        ):
-            utils.koboldai_vars.badwordsids = [
-                [v]
-                for k, v in self.tokenizer.get_vocab().items()
-                if any(c in str(k) for c in "[]")
-            ]
-
-            if utils.koboldai_vars.newlinemode == "n":
-                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
-
-        return super()._post_load()
-
-    def get_local_model_path(
-        self, legacy: bool = False, ignore_existance: bool = False
-    ) -> Optional[str]:
-        """
-        Returns a string of the model's path locally, or None if it is not downloaded.
-        If ignore_existance is true, it will always return a path.
-        """
-
-        if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
-            model_path = utils.koboldai_vars.custmodpth
-            assert model_path
-
-            # Path can be absolute or relative to models directory
-            if os.path.exists(model_path):
-                return model_path
-
-            model_path = os.path.join("models", model_path)
-
-            try:
-                assert os.path.exists(model_path)
-            except AssertionError:
-                logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
-                raise
-
-            return model_path
-
-        basename = utils.koboldai_vars.model.replace("/", "_")
-        if legacy:
-            ret = basename
-        else:
-            ret = os.path.join("models", basename)
-
-        if os.path.isdir(ret) or ignore_existance:
-            return ret
-        return None
-
-    def init_model_config(self) -> None:
-        # Get the model_type from the config or assume a model type if it isn't present
-        try:
-            self.model_config = AutoConfig.from_pretrained(
-                self.get_local_model_path() or self.model_name,
-                revision=utils.koboldai_vars.revision,
-                cache_dir="cache",
-            )
-            utils.koboldai_vars.model_type = self.model_config.model_type
-        except ValueError:
-            utils.koboldai_vars.model_type = {
-                "NeoCustom": "gpt_neo",
-                "GPT2Custom": "gpt2",
-            }.get(utils.koboldai_vars.model)
-
-            if not utils.koboldai_vars.model_type:
-                logger.warning(
-                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
-                )
-                utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 7661a67f..c99e9a05 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -16,19 +16,17 @@ from modeling.inference_model import (
     GenerationSettings,
     ModelCapabilities,
 )
-from modeling.inference_models.hf import HFInferenceModel
-
-# This file shouldn't be imported unless using the TPU
-assert utils.koboldai_vars.use_colab_tpu
-import tpu_mtj_backend
+from modeling.inference_models.parents.hf import HFInferenceModel
 
 
-class HFMTJInferenceModel(HFInferenceModel):
+
+
+class model_loader(HFInferenceModel):
     def __init__(
         self,
-        model_name: str,
+        #model_name: str,
     ) -> None:
-        super().__init__(model_name)
+        super().__init__()
 
         self.model_config = None
         self.capabilties = ModelCapabilities(
@@ -38,8 +36,13 @@ class HFMTJInferenceModel(HFInferenceModel):
             post_token_probs=False,
             uses_tpu=True,
         )
+        
+    def is_valid(self, model_name, model_path, menu_path):
+        # This file shouldn't be imported unless using the TPU
+        return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path)
 
     def setup_mtj(self) -> None:
+        import tpu_mtj_backend
         def mtj_warper_callback(scores) -> "np.array":
             scores_shape = scores.shape
             scores_list = scores.tolist()
@@ -175,6 +178,7 @@ class HFMTJInferenceModel(HFInferenceModel):
         tpu_mtj_backend.settings_callback = mtj_settings_callback
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
+        import tpu_mtj_backend
         self.setup_mtj()
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
@@ -207,6 +211,7 @@ class HFMTJInferenceModel(HFInferenceModel):
             ]
 
     def get_soft_tokens(self) -> np.array:
+        import tpu_mtj_backend
         soft_tokens = None
 
         if utils.koboldai_vars.sp is None:
@@ -258,6 +263,7 @@ class HFMTJInferenceModel(HFInferenceModel):
         seed: Optional[int] = None,
         **kwargs,
     ) -> GenerationResult:
+        import tpu_mtj_backend
         warpers.update_settings()
 
         soft_tokens = self.get_soft_tokens()
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index c6294374..56e88205 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -21,13 +21,99 @@ class HordeException(Exception):
     """To be used for errors on server side of the Horde."""
 
 
-class HordeInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
+        self.url = "https://horde.koboldai.net"
+        self.key = "0000000000"
+        self.models = self.get_cluster_models()
+        
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
 
+    def is_valid(self, model_name, model_path, menu_path):
+        logger.debug("Horde Models: {}".format(self.models))
+        return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "url",
+                                        "default": self.url,
+                                        "tooltip": "URL to the horde.",
+                                        "menu_path": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": self.key,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.models,
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, url="", key="", model=""):
+        self.key = key.strip()
+        self.model = model
+        self.url = url
+        
+    def get_cluster_models(self):
+        # Get list of models from public cluster
+        logger.info("<purple>Retrieving engine list...</purple>")
+        try:
+            req = requests.get(f"{self.url}/api/v2/status/models?type=text")
+        except:
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error("Provided KoboldAI Horde URL unreachable")
+            emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
+            return
+        if not req.ok:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
+            return
+
+        engines = req.json()
+        try:
+            engines = [{"text": en["name"], "value": en["name"]} for en in engines]
+        except:
+            logger.error(engines)
+            raise
+        logger.debug(engines)
+        
+        online_model = ""
+
+        logger.init_ok("KAI Horde Models", status="OK")
+
+        return engines
+
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer(
             utils.koboldai_vars.cluster_requested_models[0]
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index 1441ae2f..01c0c037 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -12,13 +12,96 @@ from modeling.inference_model import (
 )
 
 
+
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class OpenAIAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.key = ""
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI" or model_name == "GooseAI"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.get_oai_models(),
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, key="", model=""):
+        self.key = key.strip()
+        self.model = model
+
+    def get_oai_models(self):
+        if self.key == "":
+            return []
+        if self.source == 'OAI':
+            url = "https://api.openai.com/v1/engines"
+        elif self.source == 'GooseAI':
+            url = "https://api.goose.ai/v1/engines"
+        else:
+            return
+            
+        # Get list of models from OAI
+        logger.init("OAI Engines", status="Retrieving")
+        req = requests.get(
+            url, 
+            headers = {
+                'Authorization': 'Bearer '+self.key
+                }
+            )
+        if(req.status_code == 200):
+            r = req.json()
+            engines = r["data"]
+            try:
+                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+            except:
+                logger.error(engines)
+                raise
+            
+            online_model = ""
+
+                
+            logger.init_ok("OAI Engines", status="OK")
+            return engines
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("OAI Engines", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+            return []
+            
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("gpt2")
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
new file mode 100644
index 00000000..54781296
--- /dev/null
+++ b/modeling/inference_models/parents/hf.py
@@ -0,0 +1,219 @@
+import os
+from typing import Optional
+from transformers import AutoConfig
+
+import utils
+import koboldai_settings
+from logger import logger
+from modeling.inference_model import InferenceModel
+import torch
+
+
+class HFInferenceModel(InferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+        self.model_config = None
+        #self.model_name = model_name
+
+        self.model = None
+        self.tokenizer = None
+
+    def is_valid(self, model_name, model_path, menu_path):
+        try:
+            if model_path is not None and os.path.exists(model_path):
+                model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            else:
+                model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            return True
+        except:
+            return False
+        
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        
+        if model_path is not None and os.path.exists(model_path):
+            model_config = AutoConfig.from_pretrained(model_path)
+        elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+            model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+        else:
+            model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+        layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
+        if layer_count is not None and layer_count >= 0:
+            if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
+                with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
+                    data = [x for x in file.read().split("\n")[:2] if x != '']
+                    if len(data) < 2:
+                        data.append("0")
+                    break_values, disk_blocks = data
+                    break_values = break_values.split(",")
+            else:
+                break_values = [layer_count]
+                disk_blocks = None
+            break_values = [int(x) for x in break_values if x != '' and x is not None]
+            gpu_count = torch.cuda.device_count()
+            break_values += [0] * (gpu_count - len(break_values))
+            if disk_blocks is not None:
+                break_values += [disk_blocks]
+            for i in range(gpu_count):
+                requested_parameters.append({
+                                                "uitype": "slider",
+                                                "unit": "int",
+                                                "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+                                                "id": "{} Layers".format(i),
+                                                "min": 0,
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                "default": break_values[i],
+                                                "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+            requested_parameters.append({
+                                            "uitype": "slider",
+                                            "unit": "int",
+                                            "label": "CPU Layers",
+                                            "id": "CPU Layers",
+                                            "min": 0,
+                                            "max": layer_count,
+                                            "step": 1,
+                                            "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                            "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                            "default": layer_count - sum(break_values),
+                                            "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
+                                            "menu_path": "Layers",
+                                            "extra_classes": "",
+                                            "refresh_model_inputs": False
+                                        })
+            if disk_blocks is not None:
+                requested_parameters.append({
+                                                "uitype": "slider",
+                                                "unit": "int",
+                                                "label": "Disk Layers",
+                                                "id": "Disk_Layers",
+                                                "min": 0,
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                "default": disk_blocks,
+                                                "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+        else:
+            requested_parameters.append({
+                                            "uitype": "toggle",
+                                            "unit": "bool",
+                                            "label": "Use GPU",
+                                            "id": "use_gpu",
+                                            "default": False,
+                                            "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                            "menu_path": "Layers",
+                                            "extra_classes": "",
+                                            "refresh_model_inputs": False
+                                        })
+                                        
+        
+        return requested_parameters
+        
+    def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
+        self.layers = layers
+        self.disk_layers = disk_layers
+        self.use_gpu = use_gpu
+
+    def _post_load(self) -> None:
+        # These are model specific tokenizer overrides if a model has bad defaults
+        if utils.koboldai_vars.model_type == "llama":
+            self.tokenizer.decode_with_prefix_space = True
+            self.tokenizer.add_bos_token = False
+        elif utils.koboldai_vars.model_type == "opt":
+            self.tokenizer._koboldai_header = self.tokenizer.encode("")
+            self.tokenizer.add_bos_token = False
+            self.tokenizer.add_prefix_space = False
+
+        # Change newline behavior to match model quirks
+        if utils.koboldai_vars.model_type == "xglm":
+            # Default to </s> newline mode if using XGLM
+            utils.koboldai_vars.newlinemode = "s"
+        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+            # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
+            utils.koboldai_vars.newlinemode = "ns"
+
+        # Clean up tokens that cause issues
+        if (
+            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
+            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+        ):
+            utils.koboldai_vars.badwordsids = [
+                [v]
+                for k, v in self.tokenizer.get_vocab().items()
+                if any(c in str(k) for c in "[]")
+            ]
+
+            if utils.koboldai_vars.newlinemode == "n":
+                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+
+        return super()._post_load()
+
+    def get_local_model_path(
+        self, legacy: bool = False, ignore_existance: bool = False
+    ) -> Optional[str]:
+        """
+        Returns a string of the model's path locally, or None if it is not downloaded.
+        If ignore_existance is true, it will always return a path.
+        """
+
+        if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
+            model_path = utils.koboldai_vars.custmodpth
+            assert model_path
+
+            # Path can be absolute or relative to models directory
+            if os.path.exists(model_path):
+                return model_path
+
+            model_path = os.path.join("models", model_path)
+
+            try:
+                assert os.path.exists(model_path)
+            except AssertionError:
+                logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
+                raise
+
+            return model_path
+
+        basename = utils.koboldai_vars.model.replace("/", "_")
+        if legacy:
+            ret = basename
+        else:
+            ret = os.path.join("models", basename)
+
+        if os.path.isdir(ret) or ignore_existance:
+            return ret
+        return None
+
+    def init_model_config(self) -> None:
+        # Get the model_type from the config or assume a model type if it isn't present
+        try:
+            self.model_config = AutoConfig.from_pretrained(
+                self.get_local_model_path() or self.model_name,
+                revision=utils.koboldai_vars.revision,
+                cache_dir="cache",
+            )
+            utils.koboldai_vars.model_type = self.model_config.model_type
+        except ValueError:
+            utils.koboldai_vars.model_type = {
+                "NeoCustom": "gpt_neo",
+                "GPT2Custom": "gpt2",
+            }.get(utils.koboldai_vars.model)
+
+            if not utils.koboldai_vars.model_type:
+                logger.warning(
+                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
+                )
+                utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
similarity index 94%
rename from modeling/inference_models/hf_torch.py
rename to modeling/inference_models/parents/hf_torch.py
index 990fabfc..d8afafb1 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -31,7 +31,7 @@ from modeling import warpers
 from modeling.warpers import Warper
 from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_models.hf import HFInferenceModel
+from modeling.inference_models.parents.hf import HFInferenceModel
 from modeling.inference_model import (
     GenerationResult,
     GenerationSettings,
@@ -55,13 +55,13 @@ LOG_SAMPLER_NO_EFFECT = False
 class HFTorchInferenceModel(HFInferenceModel):
     def __init__(
         self,
-        model_name: str,
-        lazy_load: bool,
-        low_mem: bool,
+        #model_name: str,
+        #lazy_load: bool,
+        #low_mem: bool,
     ) -> None:
-        super().__init__(model_name)
-        self.lazy_load = lazy_load
-        self.low_mem = low_mem
+        super().__init__()
+        #self.lazy_load = lazy_load
+        #self.low_mem = low_mem
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -211,40 +211,6 @@ class HFTorchInferenceModel(HFInferenceModel):
         new_sample.old_sample = transformers.GenerationMixin.sample
         use_core_manipulations.sample = new_sample
 
-        # PEFT Loading. This MUST be done after all save_pretrained calls are
-        # finished on the main model.
-        if utils.args.peft:
-            from peft import PeftModel, PeftConfig
-            local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
-
-            # Make PEFT dir if it doesn't exist
-            try:
-                os.makedirs(local_peft_dir)
-            except FileExistsError:
-                pass
-
-            peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
-            logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
-
-            peft_installed_locally = True
-            possible_peft_locations = [peft_local_path, utils.args.peft]
-
-            for i, location in enumerate(possible_peft_locations):
-                try:
-                    m_self.model = PeftModel.from_pretrained(m_self.model, location)
-                    logger.debug(f"Loaded PEFT at '{location}'")
-                    break
-                except ValueError:
-                    peft_installed_locally = False
-                    if i == len(possible_peft_locations) - 1:
-                        raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
-                except RuntimeError:
-                    raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
-
-            if not peft_installed_locally:
-                logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
-                m_self.model.save_pretrained(peft_local_path)
-
         return super()._post_load()
 
     def _raw_generate(
@@ -272,13 +238,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         with torch.no_grad():
             start_time = time.time()
-
-            # HEED & BEWARE: All arguments passed to self.model.generate MUST be
-            # kwargs; see https://github.com/huggingface/peft/issues/232. If they
-            # aren't, PeftModel will EXPLODE!!!! But nothing will happen without
-            # a PEFT loaded so it's sneaky.
             genout = self.model.generate(
-                input_ids=gen_in,
+                gen_in,
                 do_sample=True,
                 max_length=min(
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
@@ -304,7 +265,6 @@ class HFTorchInferenceModel(HFInferenceModel):
     def _get_model(self, location: str, tf_kwargs: Dict):
         tf_kwargs["revision"] = utils.koboldai_vars.revision
         tf_kwargs["cache_dir"] = "cache"
-        tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
 
         # If we have model hints for legacy model, use them rather than fall back.
         try:
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
index 006bb8fd..d14d8c81 100644
--- a/modeling/inference_models/rwkv.py
+++ b/modeling/inference_models/rwkv.py
@@ -17,7 +17,7 @@ from torch.nn import functional as F
 os.environ["RWKV_JIT_ON"] = "1"
 # TODO: Include compiled kernel
 os.environ["RWKV_CUDA_ON"] = "1"
-from rwkv.model import RWKV
+
 
 import utils
 from logger import logger
@@ -55,13 +55,13 @@ MODEL_FILES = {
 }
 
 
-class RWKVInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(
         self,
-        model_name: str,
+        #model_name: str,
     ) -> None:
         super().__init__()
-        self.model_name = model_name
+        #self.model_name = model_name
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -83,6 +83,23 @@ class RWKVInferenceModel(InferenceModel):
         )
         self._old_stopping_criteria = None
 
+    def is_valid(self, model_name, model_path, menu_path):
+        try:
+            from rwkv.model import RWKV
+            valid = True
+        except:
+            valid = False
+        return valid and "rwkv" in model_name.lower()
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        return requested_parameters
+        
+    def set_input_parameters(self):
+        return
+
+
     def _ensure_directory_structure(self) -> None:
         for path in ["models/rwkv", "models/rwkv/models"]:
             try:
@@ -145,6 +162,7 @@ class RWKVInferenceModel(InferenceModel):
         # Now we load!
 
         # TODO: Breakmodel to strat
+        from rwkv.model import RWKV
         self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
 
     def _apply_warpers(
diff --git a/static/koboldai.css b/static/koboldai.css
index 230f1cbf..f3dde4b7 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding);
 }
 
 
+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px 23px 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
 .setting_minlabel {
 	padding-top: 6px;
 	grid-area: minlabel;
@@ -3370,6 +3392,23 @@ textarea {
   }
 }
 
+@keyframes pulse-red {
+  0% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
+  }
+  
+  70% {
+    transform: scale(1);
+    box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
+  }
+  
+  100% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
+  }
+}
+
 @keyframes pulse-text {
   0% {
     filter: blur(3px);
@@ -3391,6 +3430,11 @@ textarea {
   }
 }
 
+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
+}
+
 .single_pulse {
 	animation: pulse-text 0.5s 1;
 }
diff --git a/static/koboldai.js b/static/koboldai.js
index cfc32d21..0656253f 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -15,6 +15,7 @@ socket.on('popup_items', function(data){popup_items(data);});
 socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 socket.on('popup_edit_file', function(data){popup_edit_file(data);});
 socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
 socket.on('selected_model_info', function(data){selected_model_info(data);});
 socket.on('oai_engines', function(data){oai_engines(data);});
 socket.on('buildload', function(data){buildload(data);});
@@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true";
 let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
+var selected_model_data;
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -1500,49 +1502,46 @@ function getModelParameterCount(modelName) {
 	return base * multiplier;
 }
 
-function show_model_menu(data) {
-	//clear old options
-	document.getElementById("modelkey").classList.add("hidden");
-	document.getElementById("modelkey").value = "";
-	document.getElementById("modelurl").classList.add("hidden");
-	document.getElementById("use_gpu_div").classList.add("hidden");
-	document.getElementById("use_8_bit_div").classList.add("hidden");
-	document.getElementById("modellayers").classList.add("hidden");
-	document.getElementById("oaimodel").classList.add("hidden");
-	var model_layer_bars = document.getElementById('model_layer_bars');
-	while (model_layer_bars.firstChild) {
-		model_layer_bars.removeChild(model_layer_bars.firstChild);
+function new_show_model_menu(data) {
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
+	document.getElementById("modelplugin").classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
 	
 	//clear out the breadcrumbs
 	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
 	while (breadcrumbs.firstChild) {
 		breadcrumbs.removeChild(breadcrumbs.firstChild);
 	}
-	//add breadcrumbs
-	//console.log(data.breadcrumbs);
-	for (item of data.breadcrumbs) {
-		var button = document.createElement("button");
-		button.classList.add("breadcrumbitem");
-		button.setAttribute("model", data.menu);
-		button.setAttribute("folder", item[0]);
-		button.textContent = item[1];
-		button.onclick = function () {
-					socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")});
-				};
-		breadcrumbs.append(button);
-		var span = document.createElement("span");
-		span.textContent = "\\";
-		breadcrumbs.append(span);
-	}
 	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
 	//clear out the items
 	var model_list = document.getElementById('loadmodellistcontent')
 	while (model_list.firstChild) {
 		model_list.removeChild(model_list.firstChild);
 	}
 	//add items
-	for (item of data.data) {
+	for (item of data.items) {
 		var list_item = document.createElement("span");
 		list_item.classList.add("model_item");
 		
@@ -1564,10 +1563,27 @@ function show_model_menu(data) {
 		//create the actual item
 		var popup_item = document.createElement("span");
 		popup_item.classList.add("model");
-		popup_item.setAttribute("display_name", item.label);
-		popup_item.id = item.name;
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+		}
 		
-		popup_item.setAttribute("Menu", data.menu)
 		//name text
 		var text = document.createElement("span");
 		text.style="grid-area: item;";
@@ -1615,241 +1631,223 @@ function show_model_menu(data) {
 			});
 		})();
 		
-		popup_item.onclick = function () {
-						var accept = document.getElementById("btn_loadmodelaccept");
-						accept.classList.add("disabled");
-						socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")});
-						var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected");
-						for (model of model_list) {
-							model.classList.remove("selected");
-						}
-						this.classList.add("selected");
-						accept.setAttribute("selected_model", this.id);
-						accept.setAttribute("menu", this.getAttribute("Menu"));
-						accept.setAttribute("display_name", this.getAttribute("display_name"));
-					};
 		list_item.append(popup_item);
-		
-		
 		model_list.append(list_item);
 	}
-	var accept = document.getElementById("btn_loadmodelaccept");
-	accept.disabled = true;
 	
-	//finally, if they selected the custom hugging face menu we show the input box
-	if (data['menu'] == "customhuggingface") {
-		document.getElementById("custommodelname").classList.remove("hidden");
-	} else {
-		document.getElementById("custommodelname").classList.add("hidden");
-	}
-
-
-	// detect if we are in a model selection screen and show the reference
-	var refelement = document.getElementById("modelspecifier");
-	var check = document.getElementById("mainmenu");
-	if (check) {
-		refelement.classList.remove("hidden");
-	} else {
-		refelement.classList.add("hidden");
-	}
 	
 	openPopup("load-model");
+	
 }
 
+
 function selected_model_info(data) {
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
 	var accept = document.getElementById("btn_loadmodelaccept");
-	//hide or unhide key
-	if (data.key) {
-		document.getElementById("modelkey").classList.remove("hidden");
-		document.getElementById("modelkey").value = data.key_value;
-	} else {
-		document.getElementById("modelkey").classList.add("hidden");
-		document.getElementById("modelkey").value = "";
-	}
-	//hide or unhide URL
-	if  (data.url) {
-		document.getElementById("modelurl").classList.remove("hidden");
-	} else {
-		document.getElementById("modelurl").classList.add("hidden");
-	}
-	
-	//hide or unhide 8 bit mode
-	if (data.bit_8_available) {
-		document.getElementById("use_8_bit_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_8_bit_div").classList.add("hidden");
-		document.getElementById("use_8_bit").checked = false;
-	}
-	
-	//default URL loading
-	if (data.default_url != null) {
-		document.getElementById("modelurl").value = data.default_url;
-	}
-	
-	//change model loading on url if needed
-	if (data.models_on_url) {
-		document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});};
-		document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});};
-	} else {
-		document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});};
-		document.getElementById("modelurl").ochange = null;
-	}
-	
-	//show model select for APIs
-	if (data.show_online_model_select) {
-		document.getElementById("oaimodel").classList.remove("hidden");
-	} else {
-		document.getElementById("oaimodel").classList.add("hidden");
-	}
-	
-	//Multiple Model Select?
-	if (data.multi_online_models) {
-		document.getElementById("oaimodel").setAttribute("multiple", "");
-		document.getElementById("oaimodel").options[0].textContent = "All"
-	} else {
-		document.getElementById("oaimodel").removeAttribute("multiple");
-		document.getElementById("oaimodel").options[0].textContent = "Select Model(s)"
-	}
-	
-	//hide or unhide the use gpu checkbox
-	if  (data.gpu) {
-		document.getElementById("use_gpu_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_gpu_div").classList.add("hidden");
-	}
-	//setup breakmodel
-	if (data.breakmodel) {
-		document.getElementById("modellayers").classList.remove("hidden");
-		//setup model layer count
-		document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0);
-		document.getElementById("gpu_layers_max").textContent = data.layer_count;
-		document.getElementById("gpu_count").value = data.gpu_count;
-		
-		//create the gpu load bars
-		var model_layer_bars = document.getElementById('model_layer_bars');
-		while (model_layer_bars.firstChild) {
-			model_layer_bars.removeChild(model_layer_bars.firstChild);
-		}
-		
-		//Add the bars
-		for (let i = 0; i < data.gpu_names.length; i++) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "gpu_layers_box_"+i;
-			input.value = data.break_values[i];
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.break_values[i];
-			input.id = "gpu_layers_" + i;
-			input.onchange = function () {
-								document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-			
-			model_layer_bars.append(div);
-		}
-		
-		//add the disk layers
-		if (data.disk_break) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "Disk cache: "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "disk_layers_box";
-			input.value = data.disk_break_value;
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.disk_break_value;
-			input.id = "disk_layers";
-			input.onchange = function () {
-								document.getElementById(this.id+"_box").value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-		}
-		
-		model_layer_bars.append(div);
-		
-		update_gpu_layers();
-	} else {
-		document.getElementById("modellayers").classList.add("hidden");
-		accept.classList.remove("disabled");
-	}
 	accept.disabled = false;
 	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = function () {
+				//get check value:
+				if ('sum' in this.check_data) {
+					check_value = 0
+					for (const temp of this.check_data['sum']) {
+						if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+							check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+						}
+					}
+				} else {
+					check_value = this.value
+				}
+				if (this.check_data['check'] == "=") {
+					valid = (check_value == this.check_data['value']);
+				} else if (this.check_data['check'] == "!=") {
+					valid = (check_value != this.check_data['value']);
+				} else if (this.check_data['check'] == ">=") {
+					valid = (check_value >= this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value <= this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value > this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value < this.check_data['value']);
+				}
+				if (valid) {
+					//if we are supposed to refresh when this value changes we'll resubmit
+					if (this.getAttribute("refresh_model_inputs") == "true") {
+						console.log("resubmit");
+					}
+					if ('sum' in this.check_data) {
+						for (const temp of this.check_data['sum']) {
+							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+							}
+						}
+					} else {
+						this.closest(".setting_container_model").classList.remove('input_error');
+						this.closest(".setting_container_model").removeAttribute("tooltip");
+					}
+					var accept = document.getElementById("btn_loadmodelaccept");
+					if (document.getElementsByClassName("input_error").length)
+					accept.disabled = true;
+				} else {
+					if ('sum' in this.check_data) {
+						for (const temp of this.check_data['sum']) {
+							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+							}
+						}
+					} else {
+						this.closest(".setting_container_model").classList.add('input_error');
+						this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+					}
+				}
+				var accept = document.getElementById("btn_loadmodelaccept");
+				if (document.getElementsByClassName("input_error").length > 0) {
+					accept.classList.add("disabled");
+					accept.disabled = true;
+				} else {
+					accept.classList.remove("disabled");
+					accept.disabled = false;
+				}
+				
+			}
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden");
+			}
+			if (item['uitype'] == "toggle") {
+				var toggle = new_setting.querySelector('#blank_model_settings_toggle');
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onchange = onchange_event;
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				toggle.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden");
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden");
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').classList.add("hidden");
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').classList.add("hidden");
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	console.log(document.getElementById("modelplugin").value + "_settings_area");
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
 	
 }
 
@@ -1877,42 +1875,16 @@ function update_gpu_layers() {
 
 function load_model() {
 	var accept = document.getElementById('btn_loadmodelaccept');
-	gpu_layers = []
-	disk_layers = 0;
-	if (!(document.getElementById("modellayers").classList.contains("hidden"))) {
-		for (let i=0; i < document.getElementById("gpu_count").value; i++) {
-			gpu_layers.push(document.getElementById("gpu_layers_"+i).value);
-		}
-		if (document.getElementById("disk_layers")) {
-			disk_layers = document.getElementById("disk_layers").value;
-		}
-	}
-	//Need to do different stuff with custom models
-	if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu");
-		var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name");
-	} else {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model");
-		var path = "";
-	}
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
 	
-	let selected_models = [];
-	for (item of document.getElementById("oaimodel").selectedOptions) {
-		selected_models.push(item.value);
-	}
-	if (selected_models == ['']) {
-
-		selected_models = [];
-	} else if (selected_models.length == 1) {
-		selected_models = selected_models[0];
+	//get an object of all the input settings from the user
+	data = {}
+	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+		data[element.id.split("|")[1].replace("_value", "")] = element.value;
 	}
+	data = {...data, ...selected_model_data}
 	
-	message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked, 
-			   'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(), 
-			   'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value, 
-			   'online_model': selected_models,
-			   'use_8_bit': document.getElementById('use_8_bit').checked};
-	socket.emit("load_model", message);
+	socket.emit("load_model", data);
 	closePopups();
 }
 
diff --git a/templates/popups.html b/templates/popups.html
index 12c4c27a..59f07e70 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -46,35 +46,11 @@
 				<div id="model-spec-usage">Usage (VRAM)</div>
 			</span>
 		</span>
-		<div id="loadmodellistbreadcrumbs">
-			
-		</div>
+		<div id="loadmodellistbreadcrumbs"></div>
 		<div id="loadmodellistcontent" class="popup_list_area"></div>
+		<div id="loadmodelplugin" class="popup_load_cancel loadmodelsettings"><select id="modelplugin" class="settings_select hidden"></select></div>
+		<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
 		<div class="popup_load_cancel">
-			<div>
-				<input class="hidden fullwidth" type="text" placeholder="key" id="modelkey" onchange="socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});">
-				<input class="hidden fullwidth" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="hidden fullwidth" type="text" placeholder="Hugging Face Model Name" id="custommodelname" menu="" onblur="socket.emit('get_model_info', this.value);
-																																	   document.getElementById('btn_loadmodelaccept').setAttribute('selected_model', this.value);
-																																	   ">
-				<select class="hidden fullwidth settings_select" id="oaimodel"><option value="">Select OAI Model</option></select>
-			</div>
-			<div class="hidden" id=modellayers>
-				<div class="justifyleft">
-					GPU/Disk Layers<span class="material-icons-outlined helpicon" tooltip="Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.">help_icon</span>
-				</div>
-				<div class="justifyright"><span id="gpu_layers_current">0</span>/<span id="gpu_layers_max">0</span></div>
-				<div id=model_layer_bars style="color: white"></div>
-				<input type=hidden id='gpu_count' value=0/>
-			</div>
-			<div class="box flex-push-right hidden" id=use_gpu_div>
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-				<div class="box-label">Use GPU</div>
-			</div>
-			<div class="box flex-push-right hidden" id=use_8_bit_div onclick="set_8_bit_mode()">
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_8_bit" checked>
-				<div class="box-label">Use 8 bit mode</div>
-			</div>
 			<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>
 			<button type="button" class="btn popup_load_cancel_button" onclick='closePopups();' id="btn_loadmodelclose">Cancel</button>
 		</div>
diff --git a/templates/templates.html b/templates/templates.html
index 4f16ff66..49cd3e5b 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -154,3 +154,22 @@
 		</div>
 	</div>
 </div>
+<!---------------- Model Settings ---------------------->
+<div id="blank_model_settings" class="setting_container_model">
+	<span class="setting_label">
+		<span id="blank_model_settings_label">:&nbsp;</span><span id="blank_model_settings_tooltip" class="helpicon material-icons-outlined" style="text-align: left;" tooltip="">help_icon</span>
+	</span>
+	<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
+	<span class="setting_item">
+		<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
+		<span id="blank_model_settings_checkbox_container">
+			<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">
+		</span>
+		<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
+		<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
+		<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">
+	</span>
+	<span class="setting_minlabel"><span style="position: relative;" id="blank_model_settings_min_label"></span></span>
+	<span class="setting_maxlabel"><span style="position: relative;" id="blank_model_settings_max_label"></span></span>
+	</span>
+</div>
\ No newline at end of file

From 77dd5aa7259f65262f6077957b493c74d98eaa24 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 09:09:09 -0400
Subject: [PATCH 02/68] Minor update

---
 aiserver.py                        | 7 +++++--
 modeling/inference_models/horde.py | 2 +-
 static/koboldai.js                 | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index e7227c81..ac90d6f4 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6489,7 +6489,7 @@ def UI_2_select_model(data):
             if valid:
                 logger.debug("Valid Loaders: {}".format(valid_loaders))
                 emit("selected_model_info", valid_loaders)
-        if not valid:
+        if not valid and 'path' in data:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
             output = []
@@ -6501,7 +6501,9 @@ def UI_2_select_model(data):
                         break
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
-    
+        elif not valid:
+            logger.error("Nothing can load the model: {}".format(valid_loaders))
+            
     return
     
     
@@ -6530,6 +6532,7 @@ def UI_2_select_model(data):
 def UI_2_load_model(data):
     logger.info("loading Model")
     logger.info(data)
+    model_loaders[data['plugin']].set_input_parameters(**data)
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 56e88205..f02cf265 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -67,7 +67,7 @@ class model_loader(InferenceModel):
                                         "unit": "text",
                                         "label": "Model",
                                         "id": "model",
-                                        "default": "",
+                                        "default": model_name,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "Which model to use when running OpenAI/GooseAI.",
                                         "menu_path": "",
diff --git a/static/koboldai.js b/static/koboldai.js
index 0656253f..1907add8 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1882,7 +1882,9 @@ function load_model() {
 	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 		data[element.id.split("|")[1].replace("_value", "")] = element.value;
 	}
-	data = {...data, ...selected_model_data}
+	data = {...data, ...selected_model_data};
+	
+	data['plugin'] = document.getElementById("modelplugin").value;
 	
 	socket.emit("load_model", data);
 	closePopups();

From 4605d10c370b994cfbd1d27891ccae6ade8b9c6b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 12:08:35 -0400
Subject: [PATCH 03/68] Next iteration. Model Loading is broken completely now
 :)

---
 aiserver.py                                   | 180 +++---------------
 modeling/inference_model.py                   |   6 +-
 modeling/inference_models/api.py              |   4 +-
 modeling/inference_models/basic_api.py        |   4 +-
 modeling/inference_models/generic_hf_torch.py |   1 +
 modeling/inference_models/horde.py            |   8 +-
 modeling/inference_models/openai.py           |   6 +-
 modeling/inference_models/parents/hf.py       |  56 ++++--
 modeling/inference_models/parents/hf_torch.py |   2 +-
 modeling/inference_models/readonly.py         |  77 ++++++++
 static/koboldai.js                            |  13 +-
 11 files changed, 170 insertions(+), 187 deletions(-)
 create mode 100644 modeling/inference_models/readonly.py

diff --git a/aiserver.py b/aiserver.py
index ac90d6f4..f9e60641 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -645,10 +645,14 @@ def new_socketio_on(*a, **k):
 socketio.on = new_socketio_on
 
 def emit(*args, **kwargs):
-    try:
-        return _emit(*args, **kwargs)
-    except AttributeError:
-        return socketio.emit(*args, **kwargs)
+    if has_request_context():
+        try:
+            return _emit(*args, **kwargs)
+        except AttributeError:
+            return socketio.emit(*args, **kwargs)
+    else: #We're trying to send data outside of the http context. This won't work. Try the relay
+        if koboldai_settings.queue is not None:
+            koboldai_settings.queue.put([args[0], args[1], kwargs])
 utils.emit = emit
 
 #replacement for tpool.execute to maintain request contexts
@@ -1780,10 +1784,6 @@ def get_cluster_models(msg):
     emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
     emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
 
-
-def reset_model_settings():
-    koboldai_vars.reset_for_model_load()
-    
     
 def unload_model():
     global model
@@ -1816,7 +1816,7 @@ def unload_model():
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
     
     
-def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
+def load_model(plugin, initial_load=False):
     global model
     global tokenizer
     global model_config
@@ -1827,79 +1827,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
     if initial_load:
         use_breakmodel_args = True
 
-    reset_model_settings()
     koboldai_vars.reset_model()
 
-    koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model
-    if koboldai_vars.cluster_requested_models == [""]:
-        koboldai_vars.cluster_requested_models = []
-
     koboldai_vars.noai = False
-    if not use_breakmodel_args:
-        set_aibusy(True)
-        if koboldai_vars.model != 'ReadOnly':
-            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
-            #Have to add a sleep so the server will send the emit for some reason
-            time.sleep(0.1)
+    set_aibusy(True)
+    if koboldai_vars.model != 'ReadOnly':
+        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
+        #Have to add a sleep so the server will send the emit for some reason
+        time.sleep(0.1)
 
-    if gpu_layers is not None:
-        args.breakmodel_gpulayers = gpu_layers
-    elif use_breakmodel_args:
-        gpu_layers = args.breakmodel_gpulayers
-    if breakmodel_args_default_to_cpu and gpu_layers is None:
-        gpu_layers = args.breakmodel_gpulayers = []
-    if disk_layers is not None:
-        args.breakmodel_disklayers = int(disk_layers)
-    elif use_breakmodel_args:
-        disk_layers = args.breakmodel_disklayers
-    if breakmodel_args_default_to_cpu and disk_layers is None:
-        disk_layers = args.breakmodel_disklayers = 0
+    if 'model' in globals():
+        model.unload()
     
-    unload_model()
-    
-    if online_model == "":
-        koboldai_vars.configname = getmodelname()
-    #Let's set the GooseAI or OpenAI server URLs if that's applicable
-    else:
-        koboldai_vars.online_model = online_model
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
-        elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list):
-                if len(online_model) != 1:
-                    koboldai_vars.configname = koboldai_vars.model
-                else:
-                    koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
-        else:
-            koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
-
-        if path.exists(get_config_filename()):
-            changed=False
-            with open(get_config_filename(), "r") as file:
-                # Check if API key exists
-                js = json.load(file)
-                if 'online_model' in js:
-                    if js['online_model'] != online_model:
-                        changed=True
-                        js['online_model'] = online_model
-                else:
-                    changed=True
-                    js['online_model'] = online_model
-
-            if changed:
-                with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
-                    file.write(json.dumps(js, indent=3))
-
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            args.configname = "GooseAI" + "/" + online_model
-        elif koboldai_vars.model != "CLUSTER":
-            args.configname = koboldai_vars.model + "/" + online_model
-        koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
     
     # If transformers model was selected & GPU available, ask to use CPU or GPU
     if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
@@ -1937,84 +1876,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
     else:
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
-
-    # Ask for API key if InferKit was selected
-    if koboldai_vars.model == "InferKit":
-        koboldai_vars.apikey = koboldai_vars.oaiapikey
                     
-    # Swap OAI Server if GooseAI was selected
-    if koboldai_vars.model == "GooseAI":
-        koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-        koboldai_vars.model = "OAI"
-        koboldai_vars.configname = "GooseAI"
-
-    # Ask for API key if OpenAI was selected
-    if koboldai_vars.model == "OAI" and not koboldai_vars.configname:
-        koboldai_vars.configname = "OAI"
-        
-    if koboldai_vars.model == "ReadOnly":
-        koboldai_vars.noai = True
-
-    # TODO: InferKit
-    if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
-        pass
-    elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
-        koboldai_vars.colaburl = url or koboldai_vars.colaburl
-        koboldai_vars.usegpu = False
-        koboldai_vars.breakmodel = False
-
-        if koboldai_vars.model == "Colab":
-            from modeling.inference_models.basic_api import model_loader
-            model = model_loader()
-        elif koboldai_vars.model == "API":
-            from modeling.inference_models.api import model_loader
-            model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
-        elif koboldai_vars.model == "CLUSTER":
-            from modeling.inference_models.horde import model_loader
-            model = model_loader()
-        elif koboldai_vars.model == "OAI":
-            from modeling.inference_models.openai import model_loader
-            model = model_loader()
-
-        model.load(initial_load=initial_load)
-    # TODO: This check sucks, make a model object or somethign
-    elif "rwkv" in koboldai_vars.model:
-        if koboldai_vars.use_colab_tpu:
-            raise RuntimeError("RWKV is not supported on the TPU.")
-        from modeling.inference_models.rwkv import model_loader
-        model = model_loader(koboldai_vars.model)
-        model.load()
-    elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
-        # HF Torch
-        logger.init("Transformers", status='Starting')
-        for m in ("GPTJModel", "XGLMModel"):
-            try:
-                globals()[m] = getattr(__import__("transformers"), m)
-            except:
-                pass
-
-        from modeling.inference_models.generic_hf_torch import model_loader
-        model = model_loader(
-            koboldai_vars.model,
-            lazy_load=koboldai_vars.lazy_load,
-            low_mem=args.lowmem
-        )
-
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
-        logger.info(f"Pipeline created: {koboldai_vars.model}")
-    else:
-        # TPU
-        from modeling.inference_models.hf_mtj import model_loader
-        model = model_loader(
-            koboldai_vars.model
-        )
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
+    model = model_loaders[plugin]
+    model.load(initial_load=initial_load)
     
     # TODO: Convert everywhere to use model.tokenizer
     if model:
@@ -6532,7 +6396,8 @@ def UI_2_select_model(data):
 def UI_2_load_model(data):
     logger.info("loading Model")
     logger.info(data)
-    model_loaders[data['plugin']].set_input_parameters(**data)
+    model_loaders[data['plugin']].set_input_parameters(data)
+    load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
@@ -8155,7 +8020,8 @@ def send_one_time_messages(data, wait_time=0):
 # Test
 #==================================================================#
 def model_info():
-    if model_config is not None:
+    global model_config
+    if 'model_config' in globals() and model_config is not None:
         if isinstance(model_config, dict):
             if 'model_type' in model_config:
                 model_type = str(model_config['model_type'])
@@ -11045,7 +10911,7 @@ for schema in config_endpoint_schemas:
 def startup():
     if koboldai_vars.model == "" or koboldai_vars.model is None:
         koboldai_vars.model = "ReadOnly"
-    socketio.start_background_task(load_model, **{'initial_load':True})
+        socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True})
             
 print("", end="", flush=True)
 
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 27ad46db..343eb39a 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,7 @@ class InferenceModel:
         ]
         self.tokenizer = None
         self.capabilties = ModelCapabilities()
+        self.model_name = "Not Defined"
     
     def is_valid(self, model_name, model_path, menu_path, vram):
         return True
@@ -176,7 +177,7 @@ class InferenceModel:
     def requested_parameters(self, model_name, model_path, menu_path, vram):
         return {}
         
-    def define_input_parameters(self):
+    def set_input_parameters(self, parameters):
         return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
@@ -186,6 +187,9 @@ class InferenceModel:
         self._load(save_model=save_model, initial_load=initial_load)
         self._post_load()
 
+    def unload(self):
+        return
+
     def _pre_load(self) -> None:
         """Pre load hook. Called before `_load()`."""
 
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index 41088bc7..5bddd714 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -46,8 +46,8 @@ class model_loader(InferenceModel):
                                     })
         return requested_parameters
         
-    def set_input_parameters(self, base_url=""):
-        self.base_url = base_url.rstrip("/")
+    def set_input_parameters(self, parameters):
+        self.base_url = parameters['base_url'].rstrip("/")
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"]
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index d7fc0863..5666ba8e 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -45,8 +45,8 @@ class model_loader(InferenceModel):
                                     })
         return requested_parameters
         
-    def set_input_parameters(self, colaburl=""):
-        self.colaburl = colaburl
+    def set_input_parameters(self, parameters):
+        self.colaburl = parameters['colaburl']
 
     def _initialize_model(self):
         return
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index 366fbbb7..b542c712 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -30,6 +30,7 @@ class model_loader(HFTorchInferenceModel):
     
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
+        self.lazy_load = utils.koboldai_vars.lazy_load
 
         # Make model path the same as the model name to make this consistent
         # with the other loading method if it isn't a known model type. This
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index f02cf265..057669d7 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -78,10 +78,10 @@ class model_loader(InferenceModel):
                                     }])
         return requested_parameters
         
-    def set_input_parameters(self, url="", key="", model=""):
-        self.key = key.strip()
-        self.model = model
-        self.url = url
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
+        self.url = parameters['url']
         
     def get_cluster_models(self):
         # Get list of models from public cluster
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index 01c0c037..efbb01d3 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -59,9 +59,9 @@ class model_loader(InferenceModel):
                                     }])
         return requested_parameters
         
-    def set_input_parameters(self, key="", model=""):
-        self.key = key.strip()
-        self.model = model
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
 
     def get_oai_models(self):
         if self.key == "":
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 54781296..3099feaf 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -34,12 +34,12 @@ class HFInferenceModel(InferenceModel):
         requested_parameters = []
         
         if model_path is not None and os.path.exists(model_path):
-            model_config = AutoConfig.from_pretrained(model_path)
+            self.model_config = AutoConfig.from_pretrained(model_path)
         elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-            model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
         else:
-            model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
-        layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
+            self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+        layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
         if layer_count is not None and layer_count >= 0:
             if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
                 with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
@@ -61,11 +61,11 @@ class HFInferenceModel(InferenceModel):
                                                 "uitype": "slider",
                                                 "unit": "int",
                                                 "label": "{} Layers".format(torch.cuda.get_device_name(i)),
-                                                "id": "{} Layers".format(i),
+                                                "id": "{}_Layers".format(i),
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
-                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                                 "default": break_values[i],
                                                 "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
@@ -77,11 +77,11 @@ class HFInferenceModel(InferenceModel):
                                             "uitype": "slider",
                                             "unit": "int",
                                             "label": "CPU Layers",
-                                            "id": "CPU Layers",
+                                            "id": "CPU_Layers",
                                             "min": 0,
                                             "max": layer_count,
                                             "step": 1,
-                                            "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                            "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                             "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                             "default": layer_count - sum(break_values),
                                             "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
@@ -98,7 +98,7 @@ class HFInferenceModel(InferenceModel):
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
-                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                                 "default": disk_blocks,
                                                 "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
@@ -122,10 +122,40 @@ class HFInferenceModel(InferenceModel):
         
         return requested_parameters
         
-    def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
+    def set_input_parameters(self, parameters):
+        gpu_count = torch.cuda.device_count()
+        layers = []
+        for i in range(gpu_count):
+            layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+        self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
         self.layers = layers
-        self.disk_layers = disk_layers
-        self.use_gpu = use_gpu
+        self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None
+        self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+        self.model_name = parameters['id']
+        self.path = parameters['path'] if 'path' in parameters else None
+
+    def unload(self):
+        if hasattr(self, 'model'):
+            self.model = None
+        if hasattr(self, 'tokenizer'):
+            self.tokenizer = None
+        if hasattr(self, 'model_config'):
+            self.model_config = None
+        with torch.no_grad():
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
+                for tensor in gc.get_objects():
+                    try:
+                        if torch.is_tensor(tensor):
+                            tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
+                    except:
+                        pass
+        gc.collect()
+        try:
+            with torch.no_grad():
+                torch.cuda.empty_cache()
+        except:
+            pass
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
@@ -187,7 +217,7 @@ class HFInferenceModel(InferenceModel):
 
             return model_path
 
-        basename = utils.koboldai_vars.model.replace("/", "_")
+        basename = self.model_name.replace("/", "_")
         if legacy:
             ret = basename
         else:
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index d8afafb1..4de13d7b 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -398,7 +398,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         Embedding._koboldai_patch_causallm_model = self.model
 
     def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True):
-        if not self.lazy_load:
+        if not utils.koboldai_vars.lazy_load:
             return
 
         if utils.args.breakmodel_disklayers is not None:
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py
new file mode 100644
index 00000000..c642c05a
--- /dev/null
+++ b/modeling/inference_models/readonly.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+    ModelCapabilities,
+)
+
+
+class BasicAPIException(Exception):
+    """To be used for errors when using the Basic API as an interface."""
+
+
+class model_loader(InferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+
+        # Do not allow API to be served over the API
+        self.capabilties = ModelCapabilities(api_host=False)
+        self.tokenizer = self._tokenizer()
+        self.model = None
+        self.model_name = "Read Only"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "ReadOnly"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        return
+
+    def unload(self):
+        utils.koboldai_vars.noai = False
+
+    def _initialize_model(self):
+        return
+    
+    class _tokenizer():
+        def __init__(self):
+            self._koboldai_header = []
+        def decode(self, _input):
+            return ""
+        def encode(self, input_text):
+            return []
+
+    def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
+        self.tokenizer = self.tokenizer
+        self.model = None
+        utils.koboldai_vars.noai = True
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ):
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
diff --git a/static/koboldai.js b/static/koboldai.js
index 1907add8..7f004ff2 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -14,8 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);});
 socket.on('popup_items', function(data){popup_items(data);});
 socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 socket.on('popup_edit_file', function(data){popup_edit_file(data);});
-socket.on('show_model_menu', function(data){show_model_menu(data);});
-socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
+//socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){show_model_menu(data);});
 socket.on('selected_model_info', function(data){selected_model_info(data);});
 socket.on('oai_engines', function(data){oai_engines(data);});
 socket.on('buildload', function(data){buildload(data);});
@@ -1502,13 +1502,18 @@ function getModelParameterCount(modelName) {
 	return base * multiplier;
 }
 
-function new_show_model_menu(data) {
+function show_model_menu(data) {
 	//clear out the loadmodelsettings
 	var loadmodelsettings = document.getElementById('loadmodelsettings')
 	while (loadmodelsettings.firstChild) {
 		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
-	document.getElementById("modelplugin").classList.add("hidden");
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
 	var accept = document.getElementById("btn_loadmodelaccept");
 	accept.disabled = false;
 	

From e9c845dc2a1eae4927ed2a7417c6aa6969329bb9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 14:14:52 -0400
Subject: [PATCH 04/68] Fix for badwordIDs

---
 modeling/inference_models/generic_hf_torch.py | 2 +-
 modeling/inference_models/parents/hf.py       | 1 +
 modeling/inference_models/parents/hf_torch.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index b542c712..d5cf6397 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -43,7 +43,7 @@ class model_loader(HFTorchInferenceModel):
             self.model_name = os.path.basename(
                 os.path.normpath(utils.koboldai_vars.custmodpth)
             )
-            utils.koboldai_vars.model = self.model_name
+        utils.koboldai_vars.model = self.model_name
 
         # If we specify a model and it's in the root directory, we need to move
         # it to the models directory (legacy folder structure to new)
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 3099feaf..1941a12e 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel):
 
         self.model = None
         self.tokenizer = None
+        self.badwordsids = koboldai_settings.badwordsids_default
 
     def is_valid(self, model_name, model_path, menu_path):
         try:
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 4de13d7b..7cc16ad5 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -245,7 +245,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
                 ),
                 repetition_penalty=1.0,
-                bad_words_ids=utils.koboldai_vars.badwordsids
+                bad_words_ids=self.badwordsids
                 + additional_bad_words_ids,
                 use_cache=True,
                 num_return_sequences=batch_count,

From a9c785d0f0020847e342f18f9910f1ed9c4871dd Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 14:20:14 -0400
Subject: [PATCH 05/68] Fix for Horde

---
 modeling/inference_models/horde.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 057669d7..bd457197 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -116,9 +116,9 @@ class model_loader(InferenceModel):
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer(
-            utils.koboldai_vars.cluster_requested_models[0]
-            if len(utils.koboldai_vars.cluster_requested_models) > 0
-            else "gpt2",
+            self.model
+            #if len(self.model) > 0
+            #else "gpt2",
         )
 
     def _raw_generate(
@@ -166,14 +166,14 @@ class model_loader(InferenceModel):
 
         client_agent = "KoboldAI:2.0.0:koboldai.org"
         cluster_headers = {
-            "apikey": utils.koboldai_vars.horde_api_key,
+            "apikey": self.key,
             "Client-Agent": client_agent,
         }
 
         try:
             # Create request
             req = requests.post(
-                f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async",
+                f"{self.url}/api/v2/generate/text/async",
                 json=cluster_metadata,
                 headers=cluster_headers,
             )
@@ -211,7 +211,7 @@ class model_loader(InferenceModel):
         while not finished:
             try:
                 req = requests.get(
-                    f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}",
+                    f"{self.url}/api/v2/generate/text/status/{request_id}",
                     headers=cluster_agent_headers,
                 )
             except requests.exceptions.ConnectionError:

From 69d942c00cfd16708f82826fcc0d50355e322c0f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 20:22:30 -0400
Subject: [PATCH 06/68] Kind of working breakmodel

---
 aiserver.py                                   | 256 +-----------------
 koboldai_settings.py                          |   3 +-
 modeling/inference_models/generic_hf_torch.py |   7 +-
 modeling/inference_models/gooseai.py          |  31 +++
 modeling/inference_models/hf_mtj.py           |   2 +-
 modeling/inference_models/openai.py           | 168 +-----------
 modeling/inference_models/parents/hf.py       |  35 ++-
 modeling/inference_models/parents/hf_torch.py |  27 +-
 .../parents/openai_gooseai.py                 | 189 +++++++++++++
 static/koboldai.js                            |   6 +
 10 files changed, 281 insertions(+), 443 deletions(-)
 create mode 100644 modeling/inference_models/gooseai.py
 create mode 100644 modeling/inference_models/parents/openai_gooseai.py

diff --git a/aiserver.py b/aiserver.py
index f9e60641..158a6699 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1473,7 +1473,7 @@ def general_startup(override_args=None):
         koboldai_vars.quiet = True
 
     if args.nobreakmodel:
-        koboldai_vars.nobreakmodel = True
+        model_loaders['generic_hf_torch'].nobreakmodel = True
 
     if args.remote:
         koboldai_vars.host = True;
@@ -1484,6 +1484,9 @@ def general_startup(override_args=None):
     if args.localtunnel:
         koboldai_vars.host = True;
 
+    if args.lowmem:
+        model_loaders['generic_hf_torch'].low_mem = True
+
     if args.host != "Disabled":
             # This means --host option was submitted without an argument
             # Enable all LAN IPs (0.0.0.0/0)
@@ -1516,6 +1519,9 @@ def general_startup(override_args=None):
         koboldai_vars.trust_remote_code = True
     if args.cpu:
         koboldai_vars.use_colab_tpu = False
+        koboldai_vars.hascuda = False
+        koboldai_vars.usegpu = False
+        model_loaders['generic_hf_torch'].nobreakmodel = True
 
     koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
     koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1545,245 +1551,6 @@ def general_startup(override_args=None):
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
         
-#==================================================================#
-# Load Model
-#==================================================================# 
-
-@socketio.on("get_model_info")
-def get_model_info(model, directory=""):
-    logger.info("Selected: {}, {}".format(model, directory))
-    # if the model is in the api list
-    disk_blocks = 0
-    key = False
-    breakmodel = False
-    gpu = False
-    layer_count = None
-    key_value = ""
-    break_values = []
-    url = False
-    default_url = None
-    models_on_url = False
-    multi_online_models = False
-    show_online_model_select=False
-    gpu_count = torch.cuda.device_count()
-    gpu_names = []
-    send_horde_models = False
-    show_custom_model_box = False
-    for i in range(gpu_count):
-        gpu_names.append(torch.cuda.get_device_name(i))
-    if model in ['Colab', 'API']:
-        url = True
-    elif model == 'CLUSTER':
-        models_on_url = True
-        show_online_model_select=True
-        url = True
-        key = True
-        default_url = koboldai_vars.horde_url
-        multi_online_models = True
-        key_value = koboldai_vars.horde_api_key
-        url = koboldai_vars.horde_url
-        if key_value:
-            send_horde_models = True
-    elif model in [x.name for x in model_menu['apilist']]:
-        show_online_model_select=True
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                # Check if API key exists
-                try:
-                    js = json.load(file)
-
-                    if("apikey" in js and js["apikey"] != ""):
-                        # API key exists, grab it and close the file
-                        key_value = js["apikey"]
-                    elif 'oaiapikey' in js and js['oaiapikey'] != "":
-                        key_value = js["oaiapikey"]
-                    if model in ('GooseAI', 'OAI'): 
-                        get_oai_models({'model': model, 'key': key_value})
-                except json.decoder.JSONDecodeError:
-                    print(":(")
-                    pass
-        key = True
-    elif "rwkv" in model.lower():
-        pass
-    elif model == 'ReadOnly':
-        pass
-    #elif model == 'customhuggingface':
-    #    show_custom_model_box = True
-    elif args.cpu:
-        pass
-    else:
-        layer_count = get_layer_count(model, directory=directory)
-        if layer_count is None:
-            breakmodel = False
-            gpu = True
-        else:
-            breakmodel = True
-            if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]:
-                filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory)))
-            else:
-                filename = "settings/{}.breakmodel".format(model.replace("/", "_"))
-            if path.exists(filename):
-                with open(filename, "r") as file:
-                    data = [x for x in file.read().split("\n")[:2] if x != '']
-                    if len(data) < 2:
-                        data.append("0")
-                    break_values, disk_blocks = data
-                    break_values = break_values.split(",")
-            else:
-                break_values = [layer_count]
-            break_values = [int(x) for x in break_values if x != '']
-            break_values += [0] * (gpu_count - len(break_values))
-    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
-                         'disk_break_value': disk_blocks, 'accelerate': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
-                         'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
-    emit('selected_model_info', {'key_value': key_value, 'key':key, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'disk_break_value': disk_blocks, 'disk_break': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
-                         'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
-                         'show_custom_model_box': show_custom_model_box})
-    if send_horde_models:
-        get_cluster_models({'key': key_value, 'url': default_url})
-    elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER':
-        get_oai_models(key_value)
-    
-    
-
-def get_layer_count(model, directory=""):
-    if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
-        if(model == "GPT2Custom"):
-            with open(os.path.join(directory, "config.json"), "r") as f:
-                model_config = json.load(f)
-        # Get the model_type from the config or assume a model type if it isn't present
-        else:
-            if(directory):
-                model = directory
-            from transformers import AutoConfig
-            if(os.path.isdir(model.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(is_model_downloaded(model)):
-                model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(directory)):
-                model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            else:
-                model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
-        try:
-            if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
-                return utils.num_layers(model_config)
-            else:
-                return None
-        except:
-            return None
-    else:
-        return None
-
-@socketio.on('OAI_Key_Update')
-def get_oai_models(data):
-    key = data['key']
-    model = data['model']
-    koboldai_vars.oaiapikey = key
-    if model == 'OAI':
-        url = "https://api.openai.com/v1/engines"
-    elif model == 'GooseAI':
-        url = "https://api.goose.ai/v1/engines"
-    else:
-        return
-        
-    # Get list of models from OAI
-    logger.init("OAI Engines", status="Retrieving")
-    req = requests.get(
-        url, 
-        headers = {
-            'Authorization': 'Bearer '+key
-            }
-        )
-    if(req.status_code == 200):
-        r = req.json()
-        engines = r["data"]
-        try:
-            engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
-        except:
-            logger.error(engines)
-            raise
-        
-        online_model = ""
-        changed=False
-        
-        #Save the key
-        if not path.exists("settings"):
-            # If the client settings file doesn't exist, create it
-            # Write API key to file
-            os.makedirs('settings', exist_ok=True)
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                js = json.load(file)
-                if 'online_model' in js:
-                    online_model = js['online_model']
-                if "apikey" in js:
-                    if js['apikey'] != key:
-                        changed=True
-        else:
-            js = {}
-            changed=True
-
-        if changed:
-            with open("settings/{}.v2_settings".format(model), "w") as file:
-                js["apikey"] = key
-                file.write(json.dumps(js, indent=3))
-            
-        logger.init_ok("OAI Engines", status="OK")
-        emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-        emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
-    else:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("OAI Engines", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
-
-@socketio.on("get_cluster_models")
-def get_cluster_models(msg):
-    koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key
-    url = msg['url'] or koboldai_vars.horde_url
-    koboldai_vars.horde_url = url
-    # Get list of models from public cluster
-    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
-    try:
-        req = requests.get(f"{url}/api/v2/status/models?type=text")
-    except:
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error("Provided KoboldAI Horde URL unreachable")
-        emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
-        return
-    if not req.ok:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
-        return
-
-    engines = req.json()
-    logger.debug(engines)
-    try:
-        engines = [[en["name"], en["name"]] for en in engines]
-    except:
-        logger.error(engines)
-        raise
-    logger.debug(engines)
-    
-    online_model = ""
-    savesettings()
-
-    logger.init_ok("KAI Horde Models", status="OK")
-
-    emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-    emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
-
     
 def unload_model():
     global model
@@ -1845,7 +1612,6 @@ def load_model(plugin, initial_load=False):
         # loadmodelsettings()
         # loadsettings()
         logger.init("GPU support", status="Searching")
-        koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
         koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
         if(args.breakmodel is not None and args.breakmodel):
             logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
@@ -1861,12 +1627,7 @@ def load_model(plugin, initial_load=False):
         else:
             logger.init_warn("GPU support", status="Not Found")
         
-        if args.cpu:
-            koboldai_vars.usegpu = False
-            gpu_layers = None
-            disk_layers = None
-            koboldai_vars.breakmodel = False
-        elif koboldai_vars.hascuda:
+        if koboldai_vars.hascuda:
             if(koboldai_vars.bmsupported):
                 koboldai_vars.usegpu = False
                 koboldai_vars.breakmodel = True
@@ -1879,6 +1640,7 @@ def load_model(plugin, initial_load=False):
                     
     model = model_loaders[plugin]
     model.load(initial_load=initial_load)
+    logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
     if model:
diff --git a/koboldai_settings.py b/koboldai_settings.py
index d8416df2..e9562ffc 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -710,7 +710,6 @@ class model_settings(settings):
         self.modeldim    = -1     # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
         self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
         self.newlinemode = "n"
-        self.lazy_load   = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
         self.presets     = []   # Holder for presets
         self.selected_preset = ""
         self.uid_presets = []
@@ -1236,7 +1235,7 @@ class system_settings(settings):
         self.corescript  = "default.lua"  # Filename of corescript to load
         self.gpu_device  = 0      # Which PyTorch device to use when using pure GPU generation
         self.savedir     = os.getcwd()+"\\stories"
-        self.hascuda     = False  # Whether torch has detected CUDA on the system
+        self.hascuda     = torch.cuda.is_available()  # Whether torch has detected CUDA on the system
         self.usegpu      = False  # Whether to launch pipeline with GPU support
         self.splist      = []
         self.spselect    = ""     # Temporary storage for soft prompt filename to load
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index d5cf6397..c228e2ee 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -30,7 +30,6 @@ class model_loader(HFTorchInferenceModel):
     
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
-        self.lazy_load = utils.koboldai_vars.lazy_load
 
         # Make model path the same as the model name to make this consistent
         # with the other loading method if it isn't a known model type. This
@@ -69,12 +68,14 @@ class model_loader(HFTorchInferenceModel):
 
         # If we're using torch_lazy_loader, we need to get breakmodel config
         # early so that it knows where to load the individual model tensors
+        logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
         if (
             self.lazy_load
             and utils.koboldai_vars.hascuda
-            and utils.koboldai_vars.breakmodel
-            and not utils.koboldai_vars.nobreakmodel
+            and self.breakmodel
+            and not self.nobreakmodel
         ):
+            logger.debug("loading breakmodel")
             self.breakmodel_device_config(self.model_config)
 
         if self.lazy_load:
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py
new file mode 100644
index 00000000..08d8ea06
--- /dev/null
+++ b/modeling/inference_models/gooseai.py
@@ -0,0 +1,31 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+
+
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_loader(openai_gooseai_model_loader):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.url = "https://api.goose.ai/v1/engines"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return  model_name == "GooseAI"
\ No newline at end of file
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index c99e9a05..759feb65 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -27,7 +27,7 @@ class model_loader(HFInferenceModel):
         #model_name: str,
     ) -> None:
         super().__init__()
-
+        self.hf_torch = False
         self.model_config = None
         self.capabilties = ModelCapabilities(
             embedding_manipulation=False,
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index efbb01d3..cad2a7f2 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -11,6 +11,8 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
+from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+
 
 
 class OpenAIAPIError(Exception):
@@ -18,172 +20,12 @@ class OpenAIAPIError(Exception):
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(InferenceModel):
+class model_loader(openai_gooseai_model_loader):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
         super().__init__()
-        self.key = ""
+        self.url = "https://api.openai.com/v1/engines"
     
     def is_valid(self, model_name, model_path, menu_path):
-        return model_name == "OAI" or model_name == "GooseAI"
-    
-    def get_requested_parameters(self, model_name, model_path, menu_path):
-        self.source = model_name
-        requested_parameters = []
-        requested_parameters.extend([{
-                                        "uitype": "text",
-                                        "unit": "text",
-                                        "label": "Key",
-                                        "id": "key",
-                                        "default": "",
-                                        "check": {"value": "", 'check': "!="},
-                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
-                                        "menu_path": "",
-                                        "refresh_model_inputs": True,
-                                        "extra_classes": ""
-                                    },
-                                    {
-                                        "uitype": "dropdown",
-                                        "unit": "text",
-                                        "label": "Model",
-                                        "id": "model",
-                                        "default": "",
-                                        "check": {"value": "", 'check': "!="},
-                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
-                                        "menu_path": "",
-                                        "refresh_model_inputs": False,
-                                        "extra_classes": "",
-                                        'children': self.get_oai_models(),
-
-                                    }])
-        return requested_parameters
-        
-    def set_input_parameters(self, parameters):
-        self.key = parameters['key'].strip()
-        self.model = parameters['model']
-
-    def get_oai_models(self):
-        if self.key == "":
-            return []
-        if self.source == 'OAI':
-            url = "https://api.openai.com/v1/engines"
-        elif self.source == 'GooseAI':
-            url = "https://api.goose.ai/v1/engines"
-        else:
-            return
-            
-        # Get list of models from OAI
-        logger.init("OAI Engines", status="Retrieving")
-        req = requests.get(
-            url, 
-            headers = {
-                'Authorization': 'Bearer '+self.key
-                }
-            )
-        if(req.status_code == 200):
-            r = req.json()
-            engines = r["data"]
-            try:
-                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
-            except:
-                logger.error(engines)
-                raise
-            
-            online_model = ""
-
-                
-            logger.init_ok("OAI Engines", status="OK")
-            return engines
-        else:
-            # Something went wrong, print the message and quit since we can't initialize an engine
-            logger.init_err("OAI Engines", status="Failed")
-            logger.error(req.json())
-            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
-            return []
-            
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self.tokenizer = self._get_tokenizer("gpt2")
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-
-        if seed is not None:
-            logger.warning(
-                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
-            )
-
-        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
-
-        # Store context in memory to use it for comparison with generated content
-        utils.koboldai_vars.lastctx = decoded_prompt
-
-        # Build request JSON data
-        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
-        # as the koboldai_vars.model will always be OAI
-        if "GooseAI" in utils.koboldai_vars.configname:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_a": gen_settings.top_a,
-                "top_p": gen_settings.top_p,
-                "top_k": gen_settings.top_k,
-                "tfs": gen_settings.tfs,
-                "typical_p": gen_settings.typical,
-                "repetition_penalty": gen_settings.rep_pen,
-                "repetition_penalty_slope": gen_settings.rep_pen_slope,
-                "repetition_penalty_range": gen_settings.rep_pen_range,
-                "n": batch_count,
-                # TODO: Implement streaming
-                "stream": False,
-            }
-        else:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_p": gen_settings.top_p,
-                "frequency_penalty": gen_settings.rep_pen,
-                "n": batch_count,
-                "stream": False,
-            }
-
-        req = requests.post(
-            utils.koboldai_vars.oaiurl,
-            json=reqdata,
-            headers={
-                "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey,
-                "Content-Type": "application/json",
-            },
-        )
-
-        j = req.json()
-
-        if not req.ok:
-            # Send error message to web client
-            if "error" in j:
-                error_type = j["error"]["type"]
-                error_message = j["error"]["message"]
-            else:
-                error_type = "Unknown"
-                error_message = "Unknown"
-            raise OpenAIAPIError(error_type, error_message)
-
-        outputs = [out["text"] for out in j["choices"]]
-        return GenerationResult(
-            model=self,
-            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
-            prompt=prompt_tokens,
-            is_whole_generation=True,
-            single_line=single_line,
-        )
+        return model_name == "OAI"
\ No newline at end of file
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 1941a12e..c7a781d7 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -22,18 +22,19 @@ class HFInferenceModel(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         try:
             if model_path is not None and os.path.exists(model_path):
-                model_config = AutoConfig.from_pretrained(model_path)
+                self.model_config = AutoConfig.from_pretrained(model_path)
             elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-                model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
             else:
-                model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
             return True
         except:
             return False
         
     def get_requested_parameters(self, model_name, model_path, menu_path):
         requested_parameters = []
-        
+        if not self.hf_torch:
+            return []
         if model_path is not None and os.path.exists(model_path):
             self.model_config = AutoConfig.from_pretrained(model_path)
         elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
@@ -124,14 +125,20 @@ class HFInferenceModel(InferenceModel):
         return requested_parameters
         
     def set_input_parameters(self, parameters):
-        gpu_count = torch.cuda.device_count()
-        layers = []
-        for i in range(gpu_count):
-            layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
-        self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
-        self.layers = layers
-        self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None
-        self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+        if self.hf_torch:
+            import breakmodel
+            gpu_count = torch.cuda.device_count()
+            layers = []
+            for i in range(gpu_count):
+                layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+            self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+            self.layers = layers
+            self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
+            breakmodel.gpu_blocks = layers
+            breakmodel.disk_blocks = self.disk_layers
+            self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.model_type = self.get_model_type()
+            self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
         self.model_name = parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
@@ -157,6 +164,10 @@ class HFInferenceModel(InferenceModel):
                 torch.cuda.empty_cache()
         except:
             pass
+        if self.hf_torch:
+            breakmodel.breakmodel = True
+            breakmodel.gpu_blocks = []
+            breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 7cc16ad5..84c60a6c 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False
 
 
 class HFTorchInferenceModel(HFInferenceModel):
-    def __init__(
-        self,
-        #model_name: str,
-        #lazy_load: bool,
-        #low_mem: bool,
-    ) -> None:
+    def __init__(self) -> None:
         super().__init__()
-        #self.lazy_load = lazy_load
-        #self.low_mem = low_mem
+        self.hf_torch = True
+        self.lazy_load = True
+        self.low_mem = False
+        self.nobreakmodel = False
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -398,7 +395,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         Embedding._koboldai_patch_causallm_model = self.model
 
     def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True):
-        if not utils.koboldai_vars.lazy_load:
+        if not self.lazy_load:
             return
 
         if utils.args.breakmodel_disklayers is not None:
@@ -819,14 +816,14 @@ class HFTorchInferenceModel(HFInferenceModel):
         elif (
             utils.args.breakmodel_gpulayers is not None
             or utils.args.breakmodel_disklayers is not None
+            or breakmodel.gpu_blocks != []
         ):
             try:
-                if not utils.args.breakmodel_gpulayers:
-                    breakmodel.gpu_blocks = []
-                else:
-                    breakmodel.gpu_blocks = list(
-                        map(int, utils.args.breakmodel_gpulayers.split(","))
-                    )
+                if breakmodel.gpu_blocks == []:
+                    if utils.args.breakmodel_gpulayers:
+                        breakmodel.gpu_blocks = list(
+                            map(int, utils.args.breakmodel_gpulayers.split(","))
+                        )
                 assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
                 s = n_layers
                 for i in range(len(breakmodel.gpu_blocks)):
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py
new file mode 100644
index 00000000..621ccbad
--- /dev/null
+++ b/modeling/inference_models/parents/openai_gooseai.py
@@ -0,0 +1,189 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_loader(InferenceModel):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.key = ""
+        self.url = "https://api.goose.ai/v1/engines"
+        #if self.source == 'OAI':
+        #    url = "https://api.openai.com/v1/engines"
+        #elif self.source == 'GooseAI':
+        #    url = "https://api.goose.ai/v1/engines"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI" or model_name == "GooseAI"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.get_oai_models(),
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
+
+    def get_oai_models(self):
+        if self.key == "":
+            return []
+        
+            
+        # Get list of models from OAI
+        logger.init("OAI Engines", status="Retrieving")
+        req = requests.get(
+            self.url, 
+            headers = {
+                'Authorization': 'Bearer '+self.key
+                }
+            )
+        if(req.status_code == 200):
+            r = req.json()
+            engines = r["data"]
+            try:
+                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+            except:
+                logger.error(engines)
+                raise
+            
+            online_model = ""
+
+                
+            logger.init_ok("OAI Engines", status="OK")
+            return engines
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("OAI Engines", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+            return []
+            
+
+    def _load(self, save_model: bool, initial_load: bool) -> None:
+        self.tokenizer = self._get_tokenizer("gpt2")
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ) -> GenerationResult:
+
+        if seed is not None:
+            logger.warning(
+                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
+            )
+
+        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
+
+        # Store context in memory to use it for comparison with generated content
+        utils.koboldai_vars.lastctx = decoded_prompt
+
+        # Build request JSON data
+        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
+        # as the koboldai_vars.model will always be OAI
+        if "GooseAI" in utils.koboldai_vars.configname:
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_a": gen_settings.top_a,
+                "top_p": gen_settings.top_p,
+                "top_k": gen_settings.top_k,
+                "tfs": gen_settings.tfs,
+                "typical_p": gen_settings.typical,
+                "repetition_penalty": gen_settings.rep_pen,
+                "repetition_penalty_slope": gen_settings.rep_pen_slope,
+                "repetition_penalty_range": gen_settings.rep_pen_range,
+                "n": batch_count,
+                # TODO: Implement streaming
+                "stream": False,
+            }
+        else:
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_p": gen_settings.top_p,
+                "frequency_penalty": gen_settings.rep_pen,
+                "n": batch_count,
+                "stream": False,
+            }
+
+        req = requests.post(
+            self.url,
+            json=reqdata,
+            headers={
+                "Authorization": "Bearer " + self.key,
+                "Content-Type": "application/json",
+            },
+        )
+
+        j = req.json()
+
+        if not req.ok:
+            # Send error message to web client
+            if "error" in j:
+                error_type = j["error"]["type"]
+                error_message = j["error"]["message"]
+            else:
+                error_type = "Unknown"
+                error_message = "Unknown"
+            raise OpenAIAPIError(error_type, error_message)
+
+        outputs = [out["text"] for out in j["choices"]]
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
diff --git a/static/koboldai.js b/static/koboldai.js
index 7f004ff2..ab7f7832 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1652,6 +1652,12 @@ function selected_model_info(data) {
 	while (loadmodelsettings.firstChild) {
 		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
 	var accept = document.getElementById("btn_loadmodelaccept");
 	accept.disabled = false;
 	

From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 20:40:05 -0400
Subject: [PATCH 07/68] Working(?) breakmodel

---
 modeling/inference_models/parents/hf.py       |  3 +-
 modeling/inference_models/parents/hf_torch.py | 52 ++++++++++---------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index c7a781d7..67fd8b15 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
         self.model = None
         self.tokenizer = None
         self.badwordsids = koboldai_settings.badwordsids_default
+        self.usegpu = False
 
     def is_valid(self, model_name, model_path, menu_path):
         try:
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
             self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
             breakmodel.gpu_blocks = layers
             breakmodel.disk_blocks = self.disk_layers
-            self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
         self.model_name = parameters['id']
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 84c60a6c..d942a572 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                 raise
 
             logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
+            logger.debug(traceback_string)
             try:
                 return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
             except Exception as e:
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
                 ):
                     device_map[key] = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else breakmodel.primary_device
                     )
                 else:
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
                     )
                     device = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "disk"
                         if layer < disk_blocks and layer < ram_blocks
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else "shared"
                         if layer < ram_blocks
                         else bisect.bisect_right(
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 and breakmodel.primary_device != "cpu"
                                 and utils.koboldai_vars.hascuda
                                 and (
-                                    utils.koboldai_vars.breakmodel
-                                    or utils.koboldai_vars.usegpu
+                                    self.breakmodel
+                                    or self.usegpu
                                 )
                                 and model_dict[key].dtype is torch.float32
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float16)
                             if breakmodel.primary_device == "cpu" or (
-                                not utils.koboldai_vars.usegpu
-                                and not utils.koboldai_vars.breakmodel
+                                not self.usegpu
+                                and not self.breakmodel
                                 and model_dict[key].dtype is torch.float16
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float32)
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                         and breakmodel.primary_device != "cpu"
                                         and utils.koboldai_vars.hascuda
                                         and (
-                                            utils.koboldai_vars.breakmodel
-                                            or utils.koboldai_vars.usegpu
+                                            self.breakmodel
+                                            or self.usegpu
                                         )
                                     ):
                                         dtype = torch.float16
                                     if breakmodel.primary_device == "cpu" or (
-                                        not utils.koboldai_vars.usegpu
-                                        and not utils.koboldai_vars.breakmodel
+                                        not self.usegpu
+                                        and not self.breakmodel
                                     ):
                                         dtype = torch.float32
                                     if (
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
                             and breakmodel.primary_device != "cpu"
                             and utils.koboldai_vars.hascuda
                             and (
-                                utils.koboldai_vars.breakmodel
-                                or utils.koboldai_vars.usegpu
+                                self.breakmodel
+                                or self.usegpu
                             )
                             and model_dict[key].dtype is torch.float32
                         ):
                             model_dict[key] = model_dict[key].to(torch.float16)
 
                         if breakmodel.primary_device == "cpu" or (
-                            not utils.koboldai_vars.usegpu
-                            and not utils.koboldai_vars.breakmodel
+                            not self.usegpu
+                            and not self.breakmodel
                             and model_dict[key].dtype is torch.float16
                         ):
                             model_dict[key] = model_dict[key].to(torch.float32)
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                     and breakmodel.primary_device != "cpu"
                                     and utils.koboldai_vars.hascuda
                                     and (
-                                        utils.koboldai_vars.breakmodel
-                                        or utils.koboldai_vars.usegpu
+                                        self.breakmodel
+                                        or self.usegpu
                                     )
                                 ):
                                     dtype = torch.float16
                                 if breakmodel.primary_device == "cpu" or (
-                                    not utils.koboldai_vars.usegpu
-                                    and not utils.koboldai_vars.breakmodel
+                                    not self.usegpu
+                                    and not self.breakmodel
                                 ):
                                     dtype = torch.float32
                                 if (
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         if always_use or (
             utils.koboldai_vars.hascuda
             and self.low_mem
-            and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
+            and (self.usegpu or self.breakmodel)
         ):
             original_dtype = torch.get_default_dtype()
             torch.set_default_dtype(torch.float16)
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             -1,
             utils.num_layers(config),
         ):
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = True
+            logger.debug("All layers on same GPU. Breakmodel disabled")
+            self.breakmodel = False
+            self.usegpu = True
             utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
             return
 
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
             import breakmodel
 
             breakmodel.primary_device = "cpu"
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = False
+            self.breakmodel = False
+            self.usegpu = False
             return

From aaa91338996a652960bfa8b9461c2f0de8d82bee Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 21:22:33 -0400
Subject: [PATCH 08/68] Disk Cache working UI valid marker broken for disk
 cache

---
 aiserver.py                                   | 4 +---
 modeling/inference_models/parents/hf.py       | 6 +++---
 modeling/inference_models/parents/hf_torch.py | 8 ++++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 158a6699..a306449e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1639,7 +1639,7 @@ def load_model(plugin, initial_load=False):
 
                     
     model = model_loaders[plugin]
-    model.load(initial_load=initial_load)
+    model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
@@ -6156,8 +6156,6 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    logger.info("loading Model")
-    logger.info(data)
     model_loaders[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 67fd8b15..03955d88 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -53,12 +53,12 @@ class HFInferenceModel(InferenceModel):
                     break_values = break_values.split(",")
             else:
                 break_values = [layer_count]
-                disk_blocks = None
+                disk_blocks = 0
             break_values = [int(x) for x in break_values if x != '' and x is not None]
             gpu_count = torch.cuda.device_count()
             break_values += [0] * (gpu_count - len(break_values))
             if disk_blocks is not None:
-                break_values += [disk_blocks]
+                break_values += [int(disk_blocks)]
             for i in range(gpu_count):
                 requested_parameters.append({
                                                 "uitype": "slider",
@@ -134,7 +134,7 @@ class HFInferenceModel(InferenceModel):
                 layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
             self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
             self.layers = layers
-            self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
+            self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
             breakmodel.gpu_blocks = layers
             breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index d942a572..aae3ada3 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -780,6 +780,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         device_count = torch.cuda.device_count()
         if device_count < 2:
             primary = None
+        logger.debug("n_layers: {}".format(n_layers))
         gpu_blocks = breakmodel.gpu_blocks + (
             device_count - len(breakmodel.gpu_blocks)
         ) * [0]
@@ -835,10 +836,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                         s -= breakmodel.gpu_blocks[i]
                 assert sum(breakmodel.gpu_blocks) <= n_layers
                 n_layers -= sum(breakmodel.gpu_blocks)
-                if utils.args.breakmodel_disklayers is not None:
-                    assert utils.args.breakmodel_disklayers <= n_layers
-                    breakmodel.disk_blocks = utils.args.breakmodel_disklayers
-                    n_layers -= utils.args.breakmodel_disklayers
+                n_layers -= breakmodel.disk_blocks
             except:
                 logger.warning(
                     "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
@@ -949,6 +947,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
+        with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file:
+            file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks))
 
         # If all layers are on the same device, use the old GPU generation mode
         while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:

From f027d8b6e56393c12b8cd1611a3c0b7cc90802c9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 17 May 2023 21:15:31 -0400
Subject: [PATCH 09/68] Better working valid detection and named model backends
 for UI

---
 aiserver.py                                   |  53 +++--
 modeling/inference_models/api.py              |   3 +-
 modeling/inference_models/basic_api.py        |   4 +-
 modeling/inference_models/generic_hf_torch.py |   3 +-
 modeling/inference_models/gooseai.py          |   5 +-
 modeling/inference_models/hf_mtj.py           |   4 +-
 modeling/inference_models/horde.py            |   3 +-
 modeling/inference_models/openai.py           |   6 +-
 modeling/inference_models/parents/hf.py       |  24 +-
 .../parents/openai_gooseai.py                 |   2 +-
 modeling/inference_models/readonly.py         |   3 +-
 modeling/inference_models/rwkv.py             |   5 +-
 static/koboldai.js                            | 206 +++++++++++-------
 templates/templates.html                      |   5 +-
 14 files changed, 191 insertions(+), 135 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 92dde7f4..314fb512 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -622,12 +622,12 @@ from modeling.patches import patch_transformers
 
 #Load all of the model importers
 import importlib
-model_loader_code = {}
-model_loaders = {}
+model_backend_code = {}
+model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
     if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
-        model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
-        model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader()
+        model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
+        model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend()
         
 
 old_socketio_on = socketio.on
@@ -1354,6 +1354,7 @@ def general_startup(override_args=None):
     parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
+    parser.add_argument("--model_backend", help="Specify the model backend you want to use")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1447,6 +1448,12 @@ def general_startup(override_args=None):
     args.max_summary_length = int(args.max_summary_length)
 
     if args.model:
+        # At this point we have to try to load the model through the selected backend
+        if not args.model_backend:
+            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
+            exit()
+        #if 
+        
         koboldai_vars.model = args.model;
     koboldai_vars.revision = args.revision
     koboldai_settings.multi_story = args.multi_story
@@ -1472,7 +1479,7 @@ def general_startup(override_args=None):
         koboldai_vars.quiet = True
 
     if args.nobreakmodel:
-        model_loaders['generic_hf_torch'].nobreakmodel = True
+        model_backends['Huggingface'].nobreakmodel = True
 
     if args.remote:
         koboldai_vars.host = True;
@@ -1484,7 +1491,7 @@ def general_startup(override_args=None):
         koboldai_vars.host = True;
 
     if args.lowmem:
-        model_loaders['generic_hf_torch'].low_mem = True
+        model_backends['Huggingface'].low_mem = True
 
     if args.host != "Disabled":
             # This means --host option was submitted without an argument
@@ -1520,7 +1527,7 @@ def general_startup(override_args=None):
         koboldai_vars.use_colab_tpu = False
         koboldai_vars.hascuda = False
         koboldai_vars.usegpu = False
-        model_loaders['generic_hf_torch'].nobreakmodel = True
+        model_backends['Huggingface'].nobreakmodel = True
 
     koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
     koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1582,7 +1589,7 @@ def unload_model():
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
     
     
-def load_model(plugin, initial_load=False):
+def load_model(model_backend, initial_load=False):
     global model
     global tokenizer
     global model_config
@@ -1637,7 +1644,7 @@ def load_model(plugin, initial_load=False):
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
                     
-    model = model_loaders[plugin]
+    model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
@@ -6103,33 +6110,23 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        logger.debug("Asking for model info on potential model: {}".format(data))
-        valid = False
         if 'path' not in data or data['path'] == "":
             valid_loaders = {}
-            for model_loader in model_loaders:
-                logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"])))
-                if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
-                    valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-                    valid = True
-            if valid:
-                logger.debug("Valid Loaders: {}".format(valid_loaders))
-                emit("selected_model_info", valid_loaders)
-        if not valid and 'path' in data:
+            for model_backend in model_backends:
+                valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+            emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
+        else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
             output = []
             for path in paths:
                 valid=False
-                for model_loader in model_loaders:
-                    if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"):
+                for model_backend in model_backends:
+                    if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
                         valid=True
                         break
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
-            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
-        elif not valid:
-            logger.error("Nothing can load the model: {}".format(valid_loaders))
-            
+            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
     
     
@@ -6156,7 +6153,7 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    model_loaders[data['plugin']].set_input_parameters(data)
+    model_backends[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
@@ -10671,7 +10668,7 @@ for schema in config_endpoint_schemas:
 def startup():
     if koboldai_vars.model == "" or koboldai_vars.model is None:
         koboldai_vars.model = "ReadOnly"
-        socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True})
+        socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True})
             
 print("", end="", flush=True)
 
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index 5bddd714..409158f5 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -17,12 +17,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "KoboldAI API"
 
 class APIException(Exception):
     """To be used for errors when using the Kobold API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         #self.base_url = ""
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index 5666ba8e..cca9652b 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -15,11 +15,13 @@ from modeling.inference_model import (
 )
 
 
+model_backend_name = "KoboldAI Old Colab Method"
+
 class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index c228e2ee..f7a00f45 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -22,8 +22,9 @@ except ModuleNotFoundError as e:
 
 from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
 
+model_backend_name = "Huggingface"
 
-class model_loader(HFTorchInferenceModel):
+class model_backend(HFTorchInferenceModel):
     
     def _initialize_model(self):
         return
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py
index 08d8ea06..9d6e8771 100644
--- a/modeling/inference_models/gooseai.py
+++ b/modeling/inference_models/gooseai.py
@@ -11,16 +11,17 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 
+model_backend_name = "GooseAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(openai_gooseai_model_loader):
+class model_backend(openai_gooseai_model_backend):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 4e82d348..6351eca2 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -19,10 +19,10 @@ from modeling.inference_model import (
 from modeling.inference_models.parents.hf import HFInferenceModel
 from modeling.tokenizer import GenericTokenizer
 
+model_backend_name = "Huggingface MTJ"
 
 
-
-class model_loader(HFInferenceModel):
+class model_backend(HFInferenceModel):
     def __init__(
         self,
         #model_name: str,
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index bd457197..6c880bbe 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -16,12 +16,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "Horde"
 
 class HordeException(Exception):
     """To be used for errors on server side of the Horde."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.url = "https://horde.koboldai.net"
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index cad2a7f2..19a7d1e6 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -11,16 +11,16 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
-
+from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
 
+model_backend_name = "OpenAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(openai_gooseai_model_loader):
+class model_backend(openai_gooseai_model_backend):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index ba291c3f..69549bd5 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 from transformers import AutoConfig
-
+import warnings
 import utils
 import koboldai_settings
 from logger import logger
@@ -43,7 +43,7 @@ class HFInferenceModel(InferenceModel):
         else:
             self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
         layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
-        if layer_count is not None and layer_count >= 0:
+        if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
             if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
                 with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
                     data = [x for x in file.read().split("\n")[:2] if x != '']
@@ -128,15 +128,17 @@ class HFInferenceModel(InferenceModel):
     def set_input_parameters(self, parameters):
         if self.hf_torch:
             import breakmodel
-            gpu_count = torch.cuda.device_count()
-            layers = []
-            for i in range(gpu_count):
-                layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
-            self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
-            self.layers = layers
-            self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
-            breakmodel.gpu_blocks = layers
-            breakmodel.disk_blocks = self.disk_layers
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                gpu_count = torch.cuda.device_count()
+                layers = []
+                for i in range(gpu_count):
+                    layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+                self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                self.layers = layers
+                self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
+                breakmodel.gpu_blocks = layers
+                breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py
index 621ccbad..871ea5ce 100644
--- a/modeling/inference_models/parents/openai_gooseai.py
+++ b/modeling/inference_models/parents/openai_gooseai.py
@@ -18,7 +18,7 @@ class OpenAIAPIError(Exception):
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py
index c642c05a..92531af4 100644
--- a/modeling/inference_models/readonly.py
+++ b/modeling/inference_models/readonly.py
@@ -14,12 +14,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "Read Only"
 
 class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
index d14d8c81..fa6497b7 100644
--- a/modeling/inference_models/rwkv.py
+++ b/modeling/inference_models/rwkv.py
@@ -55,7 +55,10 @@ MODEL_FILES = {
 }
 
 
-class model_loader(InferenceModel):
+model_backend_name = "RWKV"
+
+
+class model_backend(InferenceModel):
     def __init__(
         self,
         #model_name: str,
diff --git a/static/koboldai.js b/static/koboldai.js
index de3ab324..905403c1 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1645,8 +1645,85 @@ function show_model_menu(data) {
 	
 }
 
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				console.log("resubmit");
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+			}
+		}
+	}
+	var accept = document.getElementById("btn_loadmodelaccept");
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
+		}
+	}
+	
+	if (ok_to_load) {
+		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
+	}
+}
 
-function selected_model_info(data) {
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
 	//clear out the loadmodelsettings
 	var loadmodelsettings = document.getElementById('loadmodelsettings')
 	while (loadmodelsettings.firstChild) {
@@ -1667,7 +1744,10 @@ function selected_model_info(data) {
 		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
 				area.classList.add("hidden");
 		}
-		document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
 	}
 	//create the content
 	for (const [loader, items] of Object.entries(data)) {
@@ -1679,7 +1759,11 @@ function selected_model_info(data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
+		if (loader == sent_data['preselected']) {
+			modelplugin.value = sent_data['preselected'];
+		}
 		
+		//create the user input for each requested input
 		for (item of items) {
 			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
 			new_setting.id = loader;
@@ -1687,73 +1771,7 @@ function selected_model_info(data) {
 			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
 			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
 			
-			onchange_event = function () {
-				//get check value:
-				if ('sum' in this.check_data) {
-					check_value = 0
-					for (const temp of this.check_data['sum']) {
-						if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-							check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
-						}
-					}
-				} else {
-					check_value = this.value
-				}
-				if (this.check_data['check'] == "=") {
-					valid = (check_value == this.check_data['value']);
-				} else if (this.check_data['check'] == "!=") {
-					valid = (check_value != this.check_data['value']);
-				} else if (this.check_data['check'] == ">=") {
-					valid = (check_value >= this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value <= this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value > this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value < this.check_data['value']);
-				}
-				if (valid) {
-					//if we are supposed to refresh when this value changes we'll resubmit
-					if (this.getAttribute("refresh_model_inputs") == "true") {
-						console.log("resubmit");
-					}
-					if ('sum' in this.check_data) {
-						for (const temp of this.check_data['sum']) {
-							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
-							}
-						}
-					} else {
-						this.closest(".setting_container_model").classList.remove('input_error');
-						this.closest(".setting_container_model").removeAttribute("tooltip");
-					}
-					var accept = document.getElementById("btn_loadmodelaccept");
-					if (document.getElementsByClassName("input_error").length)
-					accept.disabled = true;
-				} else {
-					if ('sum' in this.check_data) {
-						for (const temp of this.check_data['sum']) {
-							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
-							}
-						}
-					} else {
-						this.closest(".setting_container_model").classList.add('input_error');
-						this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
-					}
-				}
-				var accept = document.getElementById("btn_loadmodelaccept");
-				if (document.getElementsByClassName("input_error").length > 0) {
-					accept.classList.add("disabled");
-					accept.disabled = true;
-				} else {
-					accept.classList.remove("disabled");
-					accept.disabled = false;
-				}
-				
-			}
+			onchange_event = model_settings_checker;
 			if (item['uitype'] == "slider") {
 				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
 				slider_number.value = item['default'];
@@ -1764,6 +1782,7 @@ function selected_model_info(data) {
 				slider.value = item['default'];
 				slider.min = item['min'];
 				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
 				slider.id = loader + "|" + item['id'] + "_value";
 				if ('check' in item) {
 					slider.check_data = item['check'];
@@ -1777,25 +1796,37 @@ function selected_model_info(data) {
 				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
 				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
 				slider.onchange();
+				slider.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_slider').remove();
 			}
 			if (item['uitype'] == "toggle") {
-				var toggle = new_setting.querySelector('#blank_model_settings_toggle');
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
 				toggle.id = loader + "|" + item['id'] + "_value";
 				toggle.checked = item['default'];
-				toggle.onchange = onchange_event;
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
 				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
 					toggle.check_data = item['check'];
 				} else {
 					toggle.check_data = null;
 				}
-				toggle.onchange();
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden");
-				new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
 			}
 			if (item['uitype'] == "dropdown") {
 				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
@@ -1807,6 +1838,7 @@ function selected_model_info(data) {
 					select_element.append(new_option);
 				}
 				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
 				select_element.onchange = onchange_event;
 				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
@@ -1814,14 +1846,17 @@ function selected_model_info(data) {
 				} else {
 					select_element.check_data = null;
 				}
+				select_element.noresubmit = true;
 				select_element.onchange();
+				select_element.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
 			}
 			if (item['uitype'] == "password") {
 				var password_item = new_setting.querySelector('#blank_model_settings_password');
 				password_item.id = loader + "|" + item['id'] + "_value";
 				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
 				password_item.onchange = onchange_event;
 				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
@@ -1829,24 +1864,29 @@ function selected_model_info(data) {
 				} else {
 					password_item.check_data = null;
 				}
+				password_item.noresubmit = true;
 				password_item.onchange();
+				password_item.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_password').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_password').remove();
 			}
 			if (item['uitype'] == "text") {
 				var text_item = new_setting.querySelector('#blank_model_settings_text');
 				text_item.id = loader + "|" + item['id'] + "_value";
 				text_item.value = item['default'];
 				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
 				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
 					text_item.check_data = item['check'];
 				} else {
 					text_item.check_data = null;
 				}
+				text_item.noresubmit = true;
 				text_item.onchange();
+				text_item.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_text').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
 			model_area.append(new_setting);
@@ -1891,7 +1931,15 @@ function load_model() {
 	//get an object of all the input settings from the user
 	data = {}
 	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		data[element.id.split("|")[1].replace("_value", "")] = element.value;
+		var element_data = element.value;
+		if (element.getAttribute("data_type") == "int") {
+			element_data = parseInt(element_data);
+		} else if (element.getAttribute("data_type") == "float") {
+			element_data = parseFloat(element_data);
+		} else if (element.getAttribute("data_type") == "bool") {
+			element_data = (element_data == 'on');
+		}
+		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	
diff --git a/templates/templates.html b/templates/templates.html
index 49cd3e5b..49fa99f6 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -162,9 +162,8 @@
 	<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
 	<span class="setting_item">
 		<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
-		<span id="blank_model_settings_checkbox_container">
-			<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">
-		</span>
+		<!--<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">-->
+		<span id="blank_model_settings_toggle"></span>
 		<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
 		<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
 		<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">

From 182ecff20273b4921f4cefa04f7a845d22fc58ac Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 16:01:17 -0400
Subject: [PATCH 10/68] Added in model backend to the command line arguments

---
 aiserver.py                             | 70 ++++++++++++++++---------
 modeling/inference_model.py             |  2 +
 modeling/inference_models/horde.py      |  1 -
 modeling/inference_models/parents/hf.py |  8 ++-
 4 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 314fb512..235732ec 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1355,6 +1355,7 @@ def general_startup(override_args=None):
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
     parser.add_argument("--model_backend", help="Specify the model backend you want to use")
+    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1447,14 +1448,6 @@ def general_startup(override_args=None):
 
     args.max_summary_length = int(args.max_summary_length)
 
-    if args.model:
-        # At this point we have to try to load the model through the selected backend
-        if not args.model_backend:
-            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
-            exit()
-        #if 
-        
-        koboldai_vars.model = args.model;
     koboldai_vars.revision = args.revision
     koboldai_settings.multi_story = args.multi_story
 
@@ -1556,6 +1549,37 @@ def general_startup(override_args=None):
     
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
+    if args.model:
+        # At this point we have to try to load the model through the selected backend
+        if not args.model_backend:
+            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
+            logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends])))
+            exit()
+        if args.model_backend not in model_backends:
+            logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
+            exit()
+        #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
+        parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
+        ok_to_load = True
+        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        for parameter in parameters:
+            if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+                ok_to_load = False
+            elif parameter['id'] not in arg_parameters:
+                arg_parameters[parameter] = parameter['default']
+        if not ok_to_load:
+            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            exit()
+        arg_parameters['id'] = args.model
+        arg_parameters['model_path'] = args.path
+        arg_parameters['menu_path'] = ""
+        model_backends[args.model_backend].set_input_parameters(arg_parameters)
+        koboldai_vars.model = args.model
+        return args.model_backend
+    else:
+        return "Read Only"
+    
         
     
 def unload_model():
@@ -1633,13 +1657,13 @@ def load_model(model_backend, initial_load=False):
         else:
             logger.init_warn("GPU support", status="Not Found")
         
-        if koboldai_vars.hascuda:
-            if(koboldai_vars.bmsupported):
-                koboldai_vars.usegpu = False
-                koboldai_vars.breakmodel = True
-            else:
-                koboldai_vars.breakmodel = False
-                koboldai_vars.usegpu = use_gpu
+        #if koboldai_vars.hascuda:
+        #    if(koboldai_vars.bmsupported):
+        #        koboldai_vars.usegpu = False
+        #        koboldai_vars.breakmodel = True
+        #    else:
+        #        koboldai_vars.breakmodel = False
+        #        koboldai_vars.usegpu = use_gpu
     else:
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
@@ -10665,10 +10689,8 @@ for schema in config_endpoint_schemas:
 #==================================================================#
 #  Final startup commands to launch Flask app
 #==================================================================#
-def startup():
-    if koboldai_vars.model == "" or koboldai_vars.model is None:
-        koboldai_vars.model = "ReadOnly"
-        socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True})
+def startup(command_line_backend):
+    socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True})
             
 print("", end="", flush=True)
 
@@ -10677,7 +10699,7 @@ def run():
     global app
     global tpu_mtj_backend
 
-    general_startup()
+    command_line_backend = general_startup()
     # Start flask & SocketIO
     logger.init("Flask", status="Starting")
     if koboldai_vars.host:
@@ -10725,7 +10747,7 @@ def run():
            cloudflare = _run_cloudflared(port)
            koboldai_vars.cloudflare_link = cloudflare
            
-        startup()
+        startup(command_line_backend)
        
         if(args.localtunnel or args.ngrok or args.remote):
             with open('cloudflare.log', 'w') as cloudflarelog:
@@ -10745,7 +10767,7 @@ def run():
         else:
             socketio.run(app, port=port)
     else:
-        startup()
+        startup(command_line_backend)
         if args.unblock:
             if not args.no_ui:
                 try:
@@ -10773,13 +10795,13 @@ def run():
 if __name__ == "__main__":
     run()
 else:
-    general_startup()
+    command_line_backend = general_startup()
     # Start flask & SocketIO
     logger.init("Flask", status="Starting")
     Session(app)
     logger.init_ok("Flask", status="OK")
     patch_transformers()
-    startup()
+    startup(command_line_backend)
     koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000
     print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)
     
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 4a29a027..c3fff46f 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -178,6 +178,8 @@ class InferenceModel:
         return {}
         
     def set_input_parameters(self, parameters):
+        for parameter in parameters:
+            setattr(self, parameter, parameters[parameter])
         return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 6c880bbe..5d8552fb 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -86,7 +86,6 @@ class model_backend(InferenceModel):
         
     def get_cluster_models(self):
         # Get list of models from public cluster
-        logger.info("<purple>Retrieving engine list...</purple>")
         try:
             req = requests.get(f"{self.url}/api/v2/status/models?type=text")
         except:
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 69549bd5..70143b69 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -133,10 +133,14 @@ class HFInferenceModel(InferenceModel):
                 gpu_count = torch.cuda.device_count()
                 layers = []
                 for i in range(gpu_count):
-                    layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+                    layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None)
                 self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                if isinstance(self.cpu_layers, str):
+                    self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
                 self.layers = layers
-                self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
+                self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0    
+                if isinstance(self.disk_layers, str):
+                    self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
                 breakmodel.gpu_blocks = layers
                 breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None

From 4040538d3438acd56e4a9121708a79b6d0d5da83 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 18:34:00 -0400
Subject: [PATCH 11/68] Model Backends now defined in the menu

---
 aiserver.py                        | 38 ++++++++++++++++--------------
 modeling/inference_models/horde.py |  3 ++-
 static/koboldai.css                | 11 +++++++--
 static/koboldai.js                 | 16 +++++++++++--
 templates/popups.html              |  2 +-
 5 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 235732ec..aeebdbc1 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -178,11 +178,13 @@ class MenuModel(MenuItem):
         vram_requirements: str = "",
         model_type: MenuModelType = MenuModelType.HUGGINGFACE,
         experimental: bool = False,
+        model_backend: str = "Huggingface",
     ) -> None:
         super().__init__(label, name, experimental)
         self.model_type = model_type
         self.vram_requirements = vram_requirements
         self.is_downloaded = is_model_downloaded(self.name)
+        self.model_backend = model_backend
     
     def to_ui1(self) -> list:
         return [
@@ -245,7 +247,7 @@ model_menu = {
         MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
-        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
+        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
     ],
     'adventurelist': [
         MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"),
@@ -369,25 +371,24 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
-        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
-        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
-        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
-        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
-        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
-        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
-        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
-        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
-        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [
-        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API),
-        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API),
+        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"),
+        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"),
+        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"),
+        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"),
+        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"),
         MenuFolder("Return to Main Menu", "mainmenu"),
     ]
 }
@@ -1670,6 +1671,7 @@ def load_model(model_backend, initial_load=False):
                     
     model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+    koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
@@ -6136,7 +6138,7 @@ def UI_2_select_model(data):
         #Get load methods
         if 'path' not in data or data['path'] == "":
             valid_loaders = {}
-            for model_backend in model_backends:
+            for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
                 valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
             emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
         else:
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 5d8552fb..8e05fbbd 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -70,6 +70,7 @@ class model_backend(InferenceModel):
                                         "id": "model",
                                         "default": model_name,
                                         "check": {"value": "", 'check': "!="},
+                                        'multiple': True,
                                         "tooltip": "Which model to use when running OpenAI/GooseAI.",
                                         "menu_path": "",
                                         "refresh_model_inputs": False,
@@ -102,7 +103,7 @@ class model_backend(InferenceModel):
 
         engines = req.json()
         try:
-            engines = [{"text": en["name"], "value": en["name"]} for en in engines]
+            engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
         except:
             logger.error(engines)
             raise
diff --git a/static/koboldai.css b/static/koboldai.css
index f3dde4b7..b70c6877 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -352,7 +352,7 @@ border-top-right-radius: var(--tabs_rounding);
 	grid-template-areas: "label value"
 						 "item item"
 						 "minlabel maxlabel";
-	grid-template-rows: 20px 23px 20px;
+	grid-template-rows: 20px auto 20px;
 	grid-template-columns: auto 30px;
 	row-gap: 0.2em;
 	background-color: var(--setting_background);
@@ -2124,6 +2124,13 @@ body {
 	cursor: pointer;
 	background-color: #688f1f;
 }
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
 /*----------------------------- Model Load Popup ------------------------------------------*/
 
 #specspan, .popup_list_area .model_item .model {
@@ -3539,7 +3546,7 @@ h2 .material-icons-outlined {
 }
 
 
-.horde_trigger[model_model="ReadOnly"],
+.horde_trigger[model_model="Read Only"],
 .horde_trigger[model_model="CLUSTER"] {
 	display: none;
 }
diff --git a/static/koboldai.js b/static/koboldai.js
index 905403c1..399e52cf 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1695,12 +1695,20 @@ function model_settings_checker() {
 				for (const temp of this.check_data['sum']) {
 					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
 						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
-						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
 					}
 				}
 			} else {
 				this.closest(".setting_container_model").classList.add('input_error');
-				this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
 			}
 		}
 	}
@@ -1841,6 +1849,10 @@ function selected_model_info(sent_data) {
 				select_element.setAttribute("data_type", item['unit']);
 				select_element.onchange = onchange_event;
 				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
 				if ('check' in item) {
 					select_element.check_data = item['check'];
 				} else {
diff --git a/templates/popups.html b/templates/popups.html
index 59f07e70..9c6b4a9e 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -48,7 +48,7 @@
 		</span>
 		<div id="loadmodellistbreadcrumbs"></div>
 		<div id="loadmodellistcontent" class="popup_list_area"></div>
-		<div id="loadmodelplugin" class="popup_load_cancel loadmodelsettings"><select id="modelplugin" class="settings_select hidden"></select></div>
+		<div id="loadmodelplugin" class="popup_load_cancel"><select id="modelplugin" class="settings_select hidden"></select></div>
 		<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
 		<div class="popup_load_cancel">
 			<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>

From 06f59a7b7b91eeee51471df3d2debdf5943c8f78 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 20:14:33 -0400
Subject: [PATCH 12/68] Moved model backends to separate folders added some
 model backend settings save/load

---
 aiserver.py                                   |   6 +-
 modeling/inference_model.py                   |   4 +
 .../inference_models/{api.py => api/class.py} |  11 +-
 .../{basic_api.py => basic_api/class.py}      |  10 +-
 .../class.py}                                 |   6 +-
 .../{gooseai.py => gooseai/class.py}          |   4 +-
 modeling/inference_models/{parents => }/hf.py |  23 +-
 .../{hf_mtj.py => hf_mtj/class.py}            |   2 +-
 .../{parents => }/hf_torch.py                 | 129 +--------
 .../{horde.py => horde/class.py}              |   9 +
 .../{openai.py => openai/class.py}            |   3 +-
 .../{parents => }/openai_gooseai.py           |  13 +-
 .../{readonly.py => readonly/class.py}        |   0
 modeling/inference_models/rwkv.py             | 258 ------------------
 14 files changed, 69 insertions(+), 409 deletions(-)
 rename modeling/inference_models/{api.py => api/class.py} (89%)
 rename modeling/inference_models/{basic_api.py => basic_api/class.py} (88%)
 rename modeling/inference_models/{generic_hf_torch.py => generic_hf_torch/class.py} (97%)
 rename modeling/inference_models/{gooseai.py => gooseai/class.py} (80%)
 rename modeling/inference_models/{parents => }/hf.py (94%)
 rename modeling/inference_models/{hf_mtj.py => hf_mtj/class.py} (99%)
 rename modeling/inference_models/{parents => }/hf_torch.py (85%)
 rename modeling/inference_models/{horde.py => horde/class.py} (95%)
 rename modeling/inference_models/{openai.py => openai/class.py} (81%)
 rename modeling/inference_models/{parents => }/openai_gooseai.py (93%)
 rename modeling/inference_models/{readonly.py => readonly/class.py} (100%)
 delete mode 100644 modeling/inference_models/rwkv.py

diff --git a/aiserver.py b/aiserver.py
index aeebdbc1..6a87d8d3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -626,9 +626,9 @@ import importlib
 model_backend_code = {}
 model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
-    if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
-        model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
-        model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend()
+    if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
+        model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+        model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
         
 
 old_socketio_on = socketio.on
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index c3fff46f..491d2b05 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -188,6 +188,7 @@ class InferenceModel:
         self._pre_load()
         self._load(save_model=save_model, initial_load=initial_load)
         self._post_load()
+        self._save_settings()
 
     def unload(self):
         return
@@ -197,6 +198,9 @@ class InferenceModel:
 
     def _post_load(self) -> None:
         """Post load hook. Called after `_load()`."""
+    
+    def _save_settings(self) -> None:
+        """Save settings hook. Called after `_post_load()`."""
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         """Main load method. All logic related to loading the model onto the
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api/class.py
similarity index 89%
rename from modeling/inference_models/api.py
rename to modeling/inference_models/api/class.py
index 409158f5..d9ec1147 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api/class.py
@@ -26,19 +26,22 @@ class APIException(Exception):
 class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
-        #self.base_url = ""
+        self.base_url = ""
 
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.base_url = json.load(f)['base_url']
         requested_parameters = []
         requested_parameters.append({
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "base_url",
-                                        "default": False,
+                                        "default": self.base_url,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "The URL of the KoboldAI API to connect to.",
                                         "menu_path": "",
@@ -58,6 +61,10 @@ class model_backend(InferenceModel):
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
 
+    def _save_settings(self):
+        with open("settings/api.model_backend.settings", "w") as f:
+            json.dump({"base_url": self.base_url}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api/class.py
similarity index 88%
rename from modeling/inference_models/basic_api.py
rename to modeling/inference_models/basic_api/class.py
index cca9652b..6f045ef5 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -24,6 +24,7 @@ class BasicAPIException(Exception):
 class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
+        self.colaburl = ""
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
@@ -32,13 +33,16 @@ class model_backend(InferenceModel):
         return model_name == "Colab"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.colaburl = json.load(f)['base_url']
         requested_parameters = []
         requested_parameters.append({
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "colaburl",
-                                        "default": False,
+                                        "default": self.colaburl,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "The URL of the Colab KoboldAI API to connect to.",
                                         "menu_path": "",
@@ -55,6 +59,10 @@ class model_backend(InferenceModel):
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
+    
+    def _save_settings(self):
+        with open("settings/basic_api.model_backend.settings", "w") as f:
+            json.dump({"colaburl": self.colaburl}, f, indent="")
 
     def _raw_generate(
         self,
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch/class.py
similarity index 97%
rename from modeling/inference_models/generic_hf_torch.py
rename to modeling/inference_models/generic_hf_torch/class.py
index f7a00f45..4e2c8a5b 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -20,7 +20,7 @@ except ModuleNotFoundError as e:
     if not utils.koboldai_vars.use_colab_tpu:
         raise e
 
-from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
+from modeling.inference_models.hf_torch import HFTorchInferenceModel
 
 model_backend_name = "Huggingface"
 
@@ -270,3 +270,7 @@ class model_backend(HFTorchInferenceModel):
 
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
+
+    def _save_settings(self):
+        with open("settings/{}.generic_hf_torch.model_backend.settings".format(self.model_name.replace("/", "_")), "w") as f:
+            json.dump({"layers": self.layers if 'layers' in vars(self) else [], "disk_layers": self.disk_layers if 'disk_layers' in vars(self) else 0}, f, indent="")
\ No newline at end of file
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai/class.py
similarity index 80%
rename from modeling/inference_models/gooseai.py
rename to modeling/inference_models/gooseai/class.py
index 9d6e8771..8d58b4b5 100644
--- a/modeling/inference_models/gooseai.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -11,14 +11,14 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
-
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 model_backend_name = "GooseAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
+        self.source = "GooseAI"
 
 
 class model_backend(openai_gooseai_model_backend):
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/hf.py
similarity index 94%
rename from modeling/inference_models/parents/hf.py
rename to modeling/inference_models/hf.py
index 70143b69..bb3f7fe4 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/hf.py
@@ -3,6 +3,7 @@ from typing import Optional
 from transformers import AutoConfig
 import warnings
 import utils
+import json
 import koboldai_settings
 from logger import logger
 from modeling.inference_model import InferenceModel
@@ -44,16 +45,15 @@ class HFInferenceModel(InferenceModel):
             self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
         layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
         if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
-            if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
-                with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
-                    data = [x for x in file.read().split("\n")[:2] if x != '']
-                    if len(data) < 2:
-                        data.append("0")
-                    break_values, disk_blocks = data
-                    break_values = break_values.split(",")
+            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                    temp = json.load(f)
+                    break_values = temp['layers'] if 'layers' in temp else [layer_count]
+                    disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
             else:
                 break_values = [layer_count]
                 disk_blocks = 0
+            
             break_values = [int(x) for x in break_values if x != '' and x is not None]
             gpu_count = torch.cuda.device_count()
             break_values += [0] * (gpu_count - len(break_values))
@@ -132,8 +132,15 @@ class HFInferenceModel(InferenceModel):
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 gpu_count = torch.cuda.device_count()
                 layers = []
+                logger.info(parameters)
                 for i in range(gpu_count):
-                    layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None)
+                    logger.info(parameters["{}_Layers".format(i)])
+                    if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
+                        layers.append(int(parameters["{}_Layers".format(i)]))
+                    elif isinstance(parameters["{}_Layers".format(i)], str):
+                         layers.append(None)
+                    else:
+                        layers.append(parameters["{}_Layers".format(i)])
                 self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
                 if isinstance(self.cpu_layers, str):
                     self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj/class.py
similarity index 99%
rename from modeling/inference_models/hf_mtj.py
rename to modeling/inference_models/hf_mtj/class.py
index 6351eca2..4de3a1b2 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -16,7 +16,7 @@ from modeling.inference_model import (
     GenerationSettings,
     ModelCapabilities,
 )
-from modeling.inference_models.parents.hf import HFInferenceModel
+from modeling.inference_models.hf import HFInferenceModel
 from modeling.tokenizer import GenericTokenizer
 
 model_backend_name = "Huggingface MTJ"
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/hf_torch.py
similarity index 85%
rename from modeling/inference_models/parents/hf_torch.py
rename to modeling/inference_models/hf_torch.py
index f0a4a66e..8fdb8c64 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -31,7 +31,7 @@ from modeling import warpers
 from modeling.warpers import Warper
 from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_models.parents.hf import HFInferenceModel
+from modeling.inference_models.hf import HFInferenceModel
 from modeling.inference_model import (
     GenerationResult,
     GenerationSettings,
@@ -823,135 +823,10 @@ class HFTorchInferenceModel(HFInferenceModel):
             breakmodel.gpu_blocks = [0] * n_layers
             return
 
-        elif (
-            utils.args.breakmodel_gpulayers is not None
-            or utils.args.breakmodel_disklayers is not None
-            or breakmodel.gpu_blocks != []
-        ):
-            try:
-                if breakmodel.gpu_blocks == []:
-                    if utils.args.breakmodel_gpulayers:
-                        breakmodel.gpu_blocks = list(
-                            map(int, utils.args.breakmodel_gpulayers.split(","))
-                        )
-                assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
-                s = n_layers
-                for i in range(len(breakmodel.gpu_blocks)):
-                    if breakmodel.gpu_blocks[i] <= -1:
-                        breakmodel.gpu_blocks[i] = s
-                        break
-                    else:
-                        s -= breakmodel.gpu_blocks[i]
-                assert sum(breakmodel.gpu_blocks) <= n_layers
-                n_layers -= sum(breakmodel.gpu_blocks)
-                n_layers -= breakmodel.disk_blocks
-            except:
-                logger.warning(
-                    "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
-                )
-                breakmodel.gpu_blocks = [n_layers]
-                n_layers = 0
-        elif utils.args.breakmodel_layers is not None:
-            breakmodel.gpu_blocks = [
-                n_layers - max(0, min(n_layers, utils.args.breakmodel_layers))
-            ]
-            n_layers -= sum(breakmodel.gpu_blocks)
-        elif utils.args.model is not None:
+        elif breakmodel.gpu_blocks != []:
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0
-        else:
-            device_count = torch.cuda.device_count()
-            if device_count > 1:
-                print(
-                    Colors.CYAN
-                    + "\nPlease select one of your GPUs to be your primary GPU."
-                )
-                print(
-                    "VRAM usage in your primary GPU will be higher than for your other ones."
-                )
-                print("It is recommended you make your fastest GPU your primary GPU.")
-                self.breakmodel_device_list(n_layers)
-                while True:
-                    primaryselect = input("device ID> ")
-                    if (
-                        primaryselect.isnumeric()
-                        and 0 <= int(primaryselect) < device_count
-                    ):
-                        breakmodel.primary_device = int(primaryselect)
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between 0 and {device_count-1}.{Colors.END}"
-                        )
-            else:
-                breakmodel.primary_device = 0
-
-            print(
-                Colors.PURPLE
-                + "\nIf you don't have enough VRAM to run the model on a single GPU"
-            )
-            print(
-                "you can split the model between your CPU and your GPU(s), or between"
-            )
-            print("multiple GPUs if you have more than one.")
-            print("By putting more 'layers' on a GPU or CPU, more computations will be")
-            print(
-                "done on that device and more VRAM or RAM will be required on that device"
-            )
-            print("(roughly proportional to number of layers).")
-            print(
-                "It should be noted that GPUs are orders of magnitude faster than the CPU."
-            )
-            print(
-                f"This model has{Colors.YELLOW} {n_layers} {Colors.PURPLE}layers.{Colors.END}\n"
-            )
-
-            for i in range(device_count):
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=i
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into device {i}?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.gpu_blocks.append(layerselect)
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
-                if n_layers == 0:
-                    break
-
-            if n_layers > 0:
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=-1
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into the disk cache?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.disk_blocks = layerselect
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde/class.py
similarity index 95%
rename from modeling/inference_models/horde.py
rename to modeling/inference_models/horde/class.py
index 8e05fbbd..387c5833 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde/class.py
@@ -38,6 +38,11 @@ class model_backend(InferenceModel):
         return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/horde.model_backend.settings", "r") as f:
+                temp = json.load(f)
+                self.base_url = temp['url']
+                self.key = temp['key']
         requested_parameters = []
         requested_parameters.extend([{
                                         "uitype": "text",
@@ -122,6 +127,10 @@ class model_backend(InferenceModel):
             #else "gpt2",
         )
 
+    def _save_settings(self):
+        with open("settings/horde.model_backend.settings", "w") as f:
+            json.dump({"key": self.key, "url": self.url}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai/class.py
similarity index 81%
rename from modeling/inference_models/openai.py
rename to modeling/inference_models/openai/class.py
index 19a7d1e6..84fe6df9 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai/class.py
@@ -11,13 +11,14 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 model_backend_name = "OpenAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
+        self.source = "OpenAI"
 
 
 class model_backend(openai_gooseai_model_backend):
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
similarity index 93%
rename from modeling/inference_models/parents/openai_gooseai.py
rename to modeling/inference_models/openai_gooseai.py
index 871ea5ce..4d885074 100644
--- a/modeling/inference_models/parents/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -25,15 +25,14 @@ class model_backend(InferenceModel):
         super().__init__()
         self.key = ""
         self.url = "https://api.goose.ai/v1/engines"
-        #if self.source == 'OAI':
-        #    url = "https://api.openai.com/v1/engines"
-        #elif self.source == 'GooseAI':
-        #    url = "https://api.goose.ai/v1/engines"
     
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
+            with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
+                self.key = json.load(f)['key']
         self.source = model_name
         requested_parameters = []
         requested_parameters.extend([{
@@ -41,7 +40,7 @@ class model_backend(InferenceModel):
                                         "unit": "text",
                                         "label": "Key",
                                         "id": "key",
-                                        "default": "",
+                                        "default": self.key,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
                                         "menu_path": "",
@@ -106,6 +105,10 @@ class model_backend(InferenceModel):
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("gpt2")
 
+    def _save_settings(self):
+        with open("settings/{}.model_backend.settings".format(self.source), "w") as f:
+            json.dump({"key": self.key}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly/class.py
similarity index 100%
rename from modeling/inference_models/readonly.py
rename to modeling/inference_models/readonly/class.py
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
deleted file mode 100644
index fa6497b7..00000000
--- a/modeling/inference_models/rwkv.py
+++ /dev/null
@@ -1,258 +0,0 @@
-from __future__ import annotations
-import os
-
-
-import time
-from typing import Dict, List, Optional, Union
-import numpy as np
-import requests
-from tokenizers import Tokenizer
-from tqdm import tqdm
-from huggingface_hub import hf_hub_url
-
-import torch
-from torch.nn import functional as F
-
-# Must be defined before import
-os.environ["RWKV_JIT_ON"] = "1"
-# TODO: Include compiled kernel
-os.environ["RWKV_CUDA_ON"] = "1"
-
-
-import utils
-from logger import logger
-
-from modeling import warpers
-from modeling.warpers import Warper
-from modeling.stoppers import Stoppers
-from modeling.post_token_hooks import PostTokenHooks
-from modeling.tokenizer import GenericTokenizer
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-    ModelCapabilities,
-)
-
-TOKENIZER_URL = (
-    "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json"
-)
-TOKENIZER_PATH = "models/rwkv/20b_tokenizer.json"
-
-REPO_OWNER = "BlinkDL"
-MODEL_FILES = {
-    "rwkv-4-pile-14b": "RWKV-4-Pile-14B-20230213-8019.pth",
-    # NOTE: Still in progress(?)
-    "rwkv-4-pile-14b:ctx4096": "RWKV-4-Pile-14B-20230228-ctx4096-test663.pth",
-    "rwkv-4-pile-7b": "RWKV-4-Pile-7B-20221115-8047.pth",
-    "rwkv-4-pile-7b:ctx4096": "RWKV-4-Pile-7B-20230109-ctx4096.pth",
-    "rwkv-4-pile-3b": "RWKV-4-Pile-3B-20221008-8023.pth",
-    "rwkv-4-pile-3b:ctx4096": "RWKV-4-Pile-3B-20221110-ctx4096.pth",
-    "rwkv-4-pile-1b5": "RWKV-4-Pile-1B5-20220903-8040.pth",
-    "rwkv-4-pile-1b5:ctx4096": "RWKV-4-Pile-1B5-20220929-ctx4096.pth",
-    "rwkv-4-pile-430m": "RWKV-4-Pile-430M-20220808-8066.pth",
-    "rwkv-4-pile-169m": "RWKV-4-Pile-169M-20220807-8023.pth",
-}
-
-
-model_backend_name = "RWKV"
-
-
-class model_backend(InferenceModel):
-    def __init__(
-        self,
-        #model_name: str,
-    ) -> None:
-        super().__init__()
-        #self.model_name = model_name
-
-        self.post_token_hooks = [
-            PostTokenHooks.stream_tokens,
-        ]
-
-        self.stopper_hooks = [
-            Stoppers.core_stopper,
-            Stoppers.dynamic_wi_scanner,
-            Stoppers.singleline_stopper,
-            Stoppers.chat_mode_stopper,
-            Stoppers.stop_sequence_stopper,
-        ]
-
-        self.capabilties = ModelCapabilities(
-            embedding_manipulation=False,
-            post_token_hooks=True,
-            stopper_hooks=True,
-            post_token_probs=True,
-        )
-        self._old_stopping_criteria = None
-
-    def is_valid(self, model_name, model_path, menu_path):
-        try:
-            from rwkv.model import RWKV
-            valid = True
-        except:
-            valid = False
-        return valid and "rwkv" in model_name.lower()
-    
-    def get_requested_parameters(self, model_name, model_path, menu_path):
-        self.source = model_name
-        requested_parameters = []
-        return requested_parameters
-        
-    def set_input_parameters(self):
-        return
-
-
-    def _ensure_directory_structure(self) -> None:
-        for path in ["models/rwkv", "models/rwkv/models"]:
-            try:
-                os.mkdir(path)
-            except FileExistsError:
-                pass
-
-    def _get_tokenizer(self) -> GenericTokenizer:
-        if not os.path.exists(TOKENIZER_PATH):
-            logger.info("RWKV tokenizer not found, downloading...")
-
-            r = requests.get(TOKENIZER_URL)
-            with open(TOKENIZER_PATH, "wb") as file:
-                file.write(r.content)
-
-        return GenericTokenizer(Tokenizer.from_file(TOKENIZER_PATH))
-
-    def _download_model(self, model_path: str, model_class: str) -> None:
-        logger.info(f"{self.model_name} not found, downloading...")
-
-        url = hf_hub_url(
-            repo_id=f"{REPO_OWNER}/{model_class}",
-            filename=MODEL_FILES[self.model_name],
-        )
-
-        # TODO: Use aria2
-        # https://stackoverflow.com/a/57030446
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            bar = tqdm(
-                desc="Downloading RWKV Model",
-                unit="B",
-                unit_scale=True,
-                total=int(r.headers["Content-Length"]),
-            )
-            with open(model_path, "wb") as file:
-                for chunk in r.iter_content(chunk_size=8192):
-                    if not chunk:
-                        continue
-                    file.write(chunk)
-                    bar.update(len(chunk))
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self._ensure_directory_structure()
-        self.tokenizer = self._get_tokenizer()
-
-        # Parse model name
-        model_class, _, special = self.model_name.partition(":")
-        special = special or None
-
-        model_dir = os.path.join("models", "rwkv", "models", model_class)
-        if not os.path.exists(model_dir):
-            os.mkdir(model_dir)
-
-        # Download model if we need to
-        model_path = os.path.join(model_dir, MODEL_FILES[self.model_name])
-        if not os.path.exists(model_path):
-            self._download_model(model_path, model_class)
-
-        # Now we load!
-
-        # TODO: Breakmodel to strat
-        from rwkv.model import RWKV
-        self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
-
-    def _apply_warpers(
-        self, scores: torch.Tensor, input_ids: torch.Tensor
-    ) -> torch.Tensor:
-        warpers.update_settings()
-        for sid in utils.koboldai_vars.sampler_order:
-            warper = Warper.from_id(sid)
-
-            if not warper.value_is_valid():
-                continue
-
-            if warper == warpers.RepetitionPenalty:
-                # Rep pen needs more data than other samplers
-                scores = warper.torch(scores, input_ids=input_ids)
-            else:
-                scores = warper.torch(scores)
-        return scores
-
-    def _sample_token(self, logits: torch.Tensor, input_ids: torch.Tensor) -> int:
-        probs = F.softmax(logits.float(), dim=-1)
-
-        if probs.device == torch.device("cpu"):
-            probs = probs.numpy()
-            sorted_ids = np.argsort(probs)
-            sorted_probs = probs[sorted_ids][::-1]
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            probs = probs / np.sum(probs)
-            out = np.random.choice(a=len(probs), p=probs)
-            return int(out)
-        else:
-            sorted_ids = torch.argsort(probs)
-            sorted_probs = probs[sorted_ids]
-            sorted_probs = torch.flip(sorted_probs, dims=(0,))
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            out = torch.multinomial(probs, num_samples=1)[0]
-            return int(out)
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-        if seed is not None:
-            torch.manual_seed(seed)
-
-        aux_device = utils.get_auxilary_device()
-        context = torch.tensor(prompt_tokens)[None, :].to(aux_device)
-        out = []
-
-        start_time = time.time()
-        with torch.no_grad():
-            logits, state = self.model.forward(prompt_tokens, None)
-            last_token = prompt_tokens[-1]
-
-            for _ in range(max_new):
-
-                logits, state = self.model.forward([last_token], state)
-                last_token = self._sample_token(logits, context)
-                out.append(last_token)
-                add = torch.tensor([[last_token]]).to(aux_device)
-                context = torch.cat((context, add), dim=-1)
-                self._post_token_gen(context)
-
-        logger.debug(
-            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
-        )
-
-        return GenerationResult(
-            self,
-            out_batches=torch.tensor([out]),
-            prompt=prompt_tokens,
-            is_whole_generation=False,
-            output_includes_prompt=True,
-        )

From 56d2705f4bea99c65465f51c067ebb40693fd89b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 20:19:33 -0400
Subject: [PATCH 13/68] removed breakmodel command line arguments (except
 nobreakmodel)

---
 aiserver.py                           | 13 -------------
 modeling/inference_models/hf_torch.py |  2 --
 2 files changed, 15 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 6a87d8d3..b8f96a68 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1363,10 +1363,6 @@ def general_startup(override_args=None):
     parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
     parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
     parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
-    parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11")
-    parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.")
     parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
     parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
     parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
@@ -1644,15 +1640,6 @@ def load_model(model_backend, initial_load=False):
         # loadsettings()
         logger.init("GPU support", status="Searching")
         koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
-        if(args.breakmodel is not None and args.breakmodel):
-            logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
-        if(args.breakmodel_layers is not None):
-            logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
-        if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)):
-            logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
-            koboldai_vars.bmsupported = False
-        if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
-            logger.warning("This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.")
         if(koboldai_vars.hascuda):
             logger.init_ok("GPU support", status="Found")
         else:
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 8fdb8c64..71da3606 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -401,8 +401,6 @@ class HFTorchInferenceModel(HFInferenceModel):
         if not self.lazy_load:
             return
 
-        if utils.args.breakmodel_disklayers is not None:
-            breakmodel.disk_blocks = utils.args.breakmodel_disklayers
 
         disk_blocks = breakmodel.disk_blocks
         gpu_blocks = breakmodel.gpu_blocks

From 99cffd4755940bd6b78d44db3a1f1e157fcf0ca4 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 09:11:08 -0400
Subject: [PATCH 14/68] Colab GPU edition fixes

---
 aiserver.py                     | 10 ++++++++++
 koboldai_settings.py            |  3 +--
 modeling/inference_models/hf.py |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b8f96a68..36b18626 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1558,15 +1558,25 @@ def general_startup(override_args=None):
         #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
         parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
         ok_to_load = True
+        mising_parameters = []
         arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        
+        #If we're on colab we'll set everything to GPU0
+        logger.info("colab: {} model_backend: {} on_colab: {}".format(args.colab, args.model_backend, koboldai_vars.on_colab))
+        if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
+            logger.info("Using Colab Special path")
+            arg_parameters['use_gpu'] = True
+        
         for parameter in parameters:
             if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+                mising_parameters.append(parameter['id'])
                 ok_to_load = False
             elif parameter['id'] not in arg_parameters:
                 arg_parameters[parameter] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
         arg_parameters['id'] = args.model
         arg_parameters['model_path'] = args.path
diff --git a/koboldai_settings.py b/koboldai_settings.py
index f3aa0ca9..5467fe29 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1202,7 +1202,6 @@ class undefined_settings(settings):
         super().__setattr__(name, value)
         logger.error("{} just set {} to {} in koboldai_vars. That variable isn't defined!".format(inspect.stack()[1].function, name, value))
         
-
 class system_settings(settings):
     local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                             'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
@@ -1210,7 +1209,7 @@ class system_settings(settings):
                             'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
     no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                          'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
-                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 
+                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
                          'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
                          'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
     settings_name = "system"
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index bb3f7fe4..6f848fa9 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -116,7 +116,7 @@ class HFInferenceModel(InferenceModel):
                                             "label": "Use GPU",
                                             "id": "use_gpu",
                                             "default": False,
-                                            "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                            "tooltip": "Whether or not to use the GPU",
                                             "menu_path": "Layers",
                                             "extra_classes": "",
                                             "refresh_model_inputs": False

From 7e0778c871f30e17b1870c21fe7c6fb2244a839b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 09:14:37 -0400
Subject: [PATCH 15/68] Remove extra debug stuff

---
 aiserver.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 36b18626..b4aad4e7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1562,9 +1562,7 @@ def general_startup(override_args=None):
         arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
         
         #If we're on colab we'll set everything to GPU0
-        logger.info("colab: {} model_backend: {} on_colab: {}".format(args.colab, args.model_backend, koboldai_vars.on_colab))
         if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
-            logger.info("Using Colab Special path")
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:

From caef2edcfc784e210d4770a6e7337686b6c92d5b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 12:35:39 -0400
Subject: [PATCH 16/68] Migrated load dialog to UI1

---
 static/application.js | 684 ++++++++++++++++++++++++++++++++++++++++--
 static/custom.css     | 602 +++++++++++++++++++++++++++++++++++++
 static/koboldai.css   |   5 +
 static/koboldai.js    |   6 +
 templates/index.html  |  62 +---
 5 files changed, 1281 insertions(+), 78 deletions(-)

diff --git a/static/application.js b/static/application.js
index df51b06e..99a65ed7 100644
--- a/static/application.js
+++ b/static/application.js
@@ -1,3 +1,5 @@
+
+
 //=================================================================//
 //  VARIABLES
 //=================================================================//
@@ -2333,6 +2335,8 @@ $(document).ready(function(){
 	socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 	socket.on('popup_edit_file', function(data){popup_edit_file(data);});
 	socket.on('error_popup', function(data){error_popup(data);});
+	socket.on('open_model_load_menu', function(data){show_model_menu(data);});
+	socket.on('selected_model_info', function(data){selected_model_info(data);});
 
 	socket.on('from_server', function(msg) {
 		//console.log(msg);
@@ -3332,28 +3336,6 @@ $(document).ready(function(){
 		hideLoadPopup();
 	});
 	
-	load_model_accept.on("click", function(ev) {
-		hideMessage();
-		var gpu_layers;
-		var message;
-		if($("#modellayers")[0].classList.contains('hidden')) {
-			gpu_layers = ","
-		} else {
-			gpu_layers = ""
-			for (let i=0; i < $("#gpu_count")[0].value; i++) {
-				gpu_layers += $("#gpu_layers"+i)[0].value + ",";
-			}
-		}
-		var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
-		models = getSelectedOptions(document.getElementById('oaimodel'));
-		if (models.length == 1) {
-			models = models[0];
-		}
-		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
-		socket.send(message);
-		loadmodelcontent.html("");
-		hideLoadModelPopup();
-	});
 
 	sp_close.on("click", function(ev) {
 		hideSPPopup();
@@ -3388,8 +3370,9 @@ $(document).ready(function(){
 	});
 	
 	button_loadmodel.on("click", function(ev) {
-		showLoadModelPopup();
-		socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		//showLoadModelPopup();
+		//socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		socket.emit('load_model_button', {});
 	});
 	button_showmodel.on("click", function(ev) {
 		socket.send({'cmd': 'show_model', 'data': ''});
@@ -3836,3 +3819,656 @@ function show_message(data) {
 	
 	document.getElementById('message-popup').classList.remove('hidden');
 }
+
+
+
+
+
+
+
+//-----------------------------------------------------Copy from UI2--------------------------------------------------------
+function show_model_menu(data) {
+	console.log(data);
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	//clear out the breadcrumbs
+	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
+	while (breadcrumbs.firstChild) {
+		breadcrumbs.removeChild(breadcrumbs.firstChild);
+	}
+	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
+	//clear out the items
+	var model_list = document.getElementById('loadmodellistcontent')
+	while (model_list.firstChild) {
+		model_list.removeChild(model_list.firstChild);
+	}
+	//add items
+	for (item of data.items) {
+		var list_item = document.createElement("span");
+		list_item.classList.add("model_item");
+		
+		//create the folder icon
+		var folder_icon = document.createElement("span");
+		folder_icon.classList.add("material-icons-outlined");
+		folder_icon.classList.add("cursor");
+
+		let isModel = !(
+			item.isMenu ||
+			item.label === "Load a model from its directory" ||
+			item.label === "Load an old GPT-2 model (eg CloverEdition)"
+		);
+
+		folder_icon.textContent = isModel ? "psychology" : "folder";
+		list_item.append(folder_icon);
+		
+		
+		//create the actual item
+		var popup_item = document.createElement("span");
+		popup_item.classList.add("model");
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
+		}
+		
+		//name text
+		var text = document.createElement("span");
+		text.style="grid-area: item;";
+		text.textContent = item.label;
+		popup_item.append(text);
+		//model size text
+		var text = document.createElement("span");
+		text.textContent = item.size;
+		text.style="grid-area: gpu_size;padding: 2px;";
+		popup_item.append(text);
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!isModel) return;
+
+			let parameterCount = getModelParameterCount(item.label);
+			if (!parameterCount) return;
+
+			let warningText = "";
+
+			if (parameterCount > 25_000_000_000) warningText = "This is a very high-end model and will likely not run without a specialized setup."; // 25B
+			if (parameterCount < 2_000_000_000) warningText = "This is a lower-end model and may perform poorly.";			// 2B
+			if (parameterCount < 1_000_000_000) warningText = "This is a very low-end model and may perform incoherently.";	// 1B
+
+			if (!warningText) return;
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-size-warning"],
+				innerText: "warning",
+				"style.grid-area": "warning_icon",
+				tooltip: warningText
+			});
+
+		})();
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!item.isDownloaded) return;
+			if (!isModel) return;
+
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-download-notification"],
+				innerText: "download_done",
+				"style.grid-area": "downloaded_icon",
+				tooltip: "This model is already downloaded."
+			});
+		})();
+		
+		list_item.append(popup_item);
+		model_list.append(list_item);
+	}
+	
+	
+	openPopup("load-model");
+	
+}
+
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				console.log("resubmit");
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
+			}
+		}
+	}
+	var accept = document.getElementById("btn_loadmodelaccept");
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
+		}
+	}
+	
+	if (ok_to_load) {
+		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
+	}
+}
+
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		if (loader == sent_data['preselected']) {
+			modelplugin.value = sent_data['preselected'];
+		}
+		
+		//create the user input for each requested input
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = model_settings_checker;
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
+				slider.onchange();
+				slider.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').remove();
+			}
+			if (item['uitype'] == "toggle") {
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.noresubmit = true;
+				select_element.onchange();
+				select_element.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.noresubmit = true;
+				password_item.onchange();
+				password_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').remove();
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.noresubmit = true;
+				text_item.onchange();
+				text_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').remove();
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	console.log(document.getElementById("modelplugin").value + "_settings_area");
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
+	
+}
+
+function getModelParameterCount(modelName) {
+	if (!modelName) return null;
+
+	// The "T" and "K" may be a little optimistic...
+	let paramsString = modelName.toUpperCase().match(/[\d.]+[TBMK]/)
+	if (!paramsString) return null;
+	paramsString = paramsString[0];
+
+	let base = parseFloat(paramsString);
+	let multiplier = {T: 1_000_000_000_000, B: 1_000_000_000, M: 1_000_000, K: 1_000}[paramsString[paramsString.length - 1]];
+
+	return base * multiplier;
+}
+
+function openPopup(id) {
+	closePopups();
+
+	const container = document.getElementById("popup-container");
+	container.classList.remove("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+
+	const popup = document.getElementById(`${id}`);
+	popup.classList.remove("hidden");
+
+	// Sometimes we want to instantly focus on certain elements when a menu opens.
+	for (const noticeMee of popup.getElementsByClassName("focus-on-me")) {
+		noticeMee.focus();
+		break;
+	}
+}
+
+function closePopups() {
+	const container = document.getElementById("popup-container");
+	container.classList.add("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+}
+
+function $el(selector) {
+	// We do not preemptively fetch all elements upon execution (wall of consts)
+	// due to the layer of mental overhead it adds to debugging and reading
+	// code in general.
+	return document.querySelector(selector);
+}
+
+function $e(tag, parent, attributes, insertionLocation=null) {
+	// Small helper function for dynamic UI creation
+
+	let element = document.createElement(tag);
+
+	if (!attributes) attributes = {};
+
+	if ("classes" in attributes) {
+		if (!Array.isArray(attributes.classes)) throw Error("Classes was not array!");
+		for (const className of attributes.classes) {
+			element.classList.add(className);
+		}
+		delete attributes.classes;
+	}
+
+
+	for (const [attribute, value] of Object.entries(attributes)) {
+		if (attribute.includes(".")) {
+			let ref = element;
+			const parts = attribute.split(".");
+
+			for (const part of parts.slice(0, -1)) {
+				ref = ref[part];
+			}
+
+			ref[parts[parts.length - 1]] = value;
+			continue;
+		}
+
+		if (attribute in element) {
+			element[attribute] = value;
+		} else {
+			element.setAttribute(attribute, value);
+		}
+	}
+
+	if (!parent) return element;
+
+	if (insertionLocation && Object.keys(insertionLocation).length) {
+		let [placement, target] = Object.entries(insertionLocation)[0];
+		if (placement === "before") {
+			parent.insertBefore(element, target);
+		} else if (placement === "after") {
+			parent.insertBefore(element, target.nextSibling);
+		} else {
+			throw Error(`I have no clue what placement ${placement} is`);
+		}
+	} else {
+		parent.appendChild(element);
+	}
+
+	return element;
+}
+
+function load_model() {
+	var accept = document.getElementById('btn_loadmodelaccept');
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+	
+	//get an object of all the input settings from the user
+	data = {}
+	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+		var element_data = element.value;
+		if (element.getAttribute("data_type") == "int") {
+			element_data = parseInt(element_data);
+		} else if (element.getAttribute("data_type") == "float") {
+			element_data = parseFloat(element_data);
+		} else if (element.getAttribute("data_type") == "bool") {
+			element_data = (element_data == 'on');
+		}
+		data[element.id.split("|")[1].replace("_value", "")] = element_data;
+	}
+	data = {...data, ...selected_model_data};
+	
+	data['plugin'] = document.getElementById("modelplugin").value;
+	
+	socket.emit("load_model", data);
+	closePopups();
+}
+
+function initalizeTooltips() {
+	const tooltip = $e("span", document.body, {id: "tooltip-text", "style.display": "none"});
+	let tooltipTarget = null;
+
+	function alterTooltipState(target, specialClass=null) {
+		tooltipTarget = target;
+		tooltip.style.display = target ? "block" : "none";
+		tooltip.className = specialClass || "";
+	}
+
+	function registerElement(el) {
+		// el should have attribute "tooltip"
+		let text = el.getAttribute("tooltip");
+
+		el.addEventListener("mouseenter", function(event) {
+			if (!el.hasAttribute("tooltip")) return;
+			tooltip.innerText = text;
+			let specialClass = "tooltip-standard";
+
+			// Kinda lame
+			if (this.classList.contains("context-token")) specialClass = "tooltip-context-token";
+
+			alterTooltipState(el, specialClass);
+		});
+
+		el.addEventListener("mouseleave", function(event) {
+			alterTooltipState(null);
+		});
+	}
+
+	const xOffset = 10;
+	const yOffset = 15;
+
+	document.addEventListener("mousemove", function(event) {
+		if (!tooltipTarget) return;
+
+		let [x, y] = [event.x, event.y];
+
+		// X + the tooltip's width is the farthest point right we will display;
+		// let's account for it. If we will render outside of the window,
+		// subtract accordingly.
+		let xOverflow = (x + tooltip.clientWidth) - window.innerWidth;
+		if (xOverflow > 0) x -= xOverflow;
+
+		if (xOverflow + xOffset < 0) x += xOffset;
+
+		// Same for Y!
+		let yOverflow = (y + tooltip.clientHeight) - window.innerHeight;
+		if (yOverflow > 0) y -= yOverflow;
+
+		if (yOverflow + yOffset < 0) y += yOffset;
+
+		tooltip.style.left = `${x}px`;
+		tooltip.style.top = `${y}px`;
+	});
+
+	// Inital scan
+	for (const element of document.querySelectorAll("[tooltip]")) {
+		registerElement(element);
+	}
+
+	// Use a MutationObserver to catch future tooltips
+	const observer = new MutationObserver(function(records, observer) {
+		for (const record of records) {
+			
+			if (record.type === "attributes") {
+				// Sanity check
+				if (record.attributeName !== "tooltip") continue;
+				registerElement(record.target);
+				continue;
+			}
+			
+			// If we remove the tooltip target, stop showing the tooltip. Maybe a little ineffecient.
+			if (!document.body.contains(tooltipTarget)) alterTooltipState(null);
+
+			for (const node of record.addedNodes) {
+				if (node.nodeType !== 1) continue;
+
+				if (node.hasAttribute("tooltip")) registerElement(node);
+
+				// Register for descendants (Slow?)
+				for (const element of node.querySelectorAll("[tooltip]")) {
+					registerElement(element);
+				}
+			}
+		}
+	});
+	observer.observe(document.body, {
+		childList: true,
+		subtree: true,
+		attributeFilter: ["tooltip"],
+	});
+}
+
+// Must be done before any elements are made; we track their changes.
+console.log(document.body);
+initalizeTooltips();
\ No newline at end of file
diff --git a/static/custom.css b/static/custom.css
index 3e266701..ffa6f44f 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -1728,4 +1728,606 @@ body.connected .popupfooter, .popupfooter.always-available {
 
 .wientry > .input-token-usage {
 	bottom: 8px;
+}
+
+
+/*----------------------------------------------COPY FROM UI2-----------------------------------------------------------------------*/
+:root {
+	/*General*/
+	--background: #252e3b;
+	--gamescreen_background: #111820;
+    --input_background: #111820;
+
+    --text: #e0e0e0;
+    --text_to_ai_color: #e0e0e0;
+    --text_edit: #9cc3ee;
+	--action_mode_input: #33E978;
+
+	--statusbar_color: #eedcb880;
+	--statusbar_text_color: #e0e0e0;
+	--scrollbar-color: #2f3b4bdb;
+
+	/*Buttons*/
+		/*General*/
+	--enabled_button_text: #e0e0e0;
+    --enabled_button_background_color: #2d3d52;
+    --enabled_button_border_color: #253446;
+
+	--disabled_button_text: #303030;
+    --disabled_button_background_color: #495762;
+    --disabled_button_border_color: #686c68;
+
+		/*Home Tab*/
+	--button_text: #e0e0e0;
+	--button_background: #283445;
+		
+		/*Alternate Button*/
+	--alternate_button_text: #e0e0e0;
+	--alternate_button_background: #283445;
+
+	/*Buttons -> Icon Button*/
+	--icon_button_background:;
+	--icon_button_color:;
+	--icon_button_border_color:;
+
+	/*Context Menu*/
+	--context_menu_text:;
+	--context_menu_background:;
+	--context_menu_border:;
+	--context_menu_division:;
+	--context_menu_hover_text:;
+	--context_menu_hover_background:;
+
+	/*Sequence, AKA Gens Per Action*/
+	--sequence_area_background: #111820;
+    --sequence_background: #eedcb8;
+	--sequence_text: #e0e0e0;
+
+	/*Side Menus*/
+	--tab_color: #243047;
+
+	--flyout_background: #18222d;
+	--flyout_background_pinned: #18222d;
+
+	--setting_background: #273141;
+    --setting_text: #e0e0e0;
+
+	--sample_order_select_color: #1f2934;
+	--sample_order_select_color_text: #eedcb8;
+
+	--dropdown_text: #e0e0e0;
+	--dropdown_background: #212935;
+
+	--rangeslider_background_color: #1f2934;
+	--rangeslider_color: #1f2934;
+	--rangeslider_circle_color: #404d64;
+
+	--help_icon: #7c8389;
+	--tooltip_text: #e0e0e0;
+    --tooltip_background: #303c50;
+	--setting_category_help_text_color: #E0E0E0;
+	
+	--setting_footer_border_color: #334552;
+	--setting_footer_text_color: #e0e0e0;
+	--setting_footer_background_color: #18222d;
+
+	/*Substitution Card*/
+	--substitution_card_input_border:;
+	--substitution_card_input_background:;
+
+	/*Palette Card*/
+	--palette_card_background: #273141;
+	--palette_card_text: #e0e0e0;
+	--palette_table_border: #607c90;
+
+	/*World Info*/
+	--wi_card_border_color: #334552;
+    --wi_card_border_color_to_ai: #eedcb880;
+
+    --wi_card_bg_color: #223040;
+	--wi_card_text_color: #e0e0e0;
+
+    --wi_card_tag_bg_color: #1d2835;
+	--wi_card_tag_text_color: #e0e0e0;
+	
+    --wi_tag_color: #283445;
+	--wi_tag_text_color: #e0e0e0;
+	
+	/*Popup*/
+	--popup_background_color: #1a2530;
+    --popup_title_bar_color: #283445;
+	--popup_title_bar_color_text: #e0e0e0;
+
+    --popup_item_color: #1a2530;
+	--popup_item_color_text: #e0e0e0;
+
+	--popup_hover_color: #1e2733;
+	--popup_hover_color_text: #e0e0e0;
+	--popup_selected_color: #242d3c;
+	--popup_selected_color_text: #eedcb8;
+
+	--popup_button_color: #283445;
+	--popup_button_color_text: #e0e0e0;
+	--popup_cancel_button_color: #25364a;
+	--popup_cancel_button_color_text: #e0e0e0;
+
+	--error: #19242c;
+	--error_text: #e0e0e0;
+	--error_title: #25364a;
+	--error_title_text: #e0e0e0;
+	
+	/*Context Bar Colors*/
+	--context_colors_memory: #04325c;
+	--context_colors_authors_notes: #165a62;
+	--context_colors_world_info: #1864a3;
+	--context_colors_prompt: #868686;
+	--context_colors_game_text: #63710e;
+	--context_colors_submit: #ffffff00;
+	--context_colors_unused: #ffffff24;
+	--context_colors_soft_prompt: #141414;
+	--context_colors_genre: #2c5c88;
+
+    /*Parameters*/
+	--scrollbar-size: 6px;
+	--palette_card_shadow: 0;
+	--wi_card_shadow: 0;
+	--light_shadow_value: 0;
+	--left_menu_strong_shadow: 0;
+	--right_menu_light_shadow: 0;
+	--right_menu_strong_shadow: 0;
+	--context_menu_shadow: var(--wi_card_shadow);	
+	--radius_inputbox: 2px;
+	--radius_unpinned_menu: 2px;
+	--radius_sequence: 5px;
+	--radius_settings_background: 2px;
+	--radius_button: 2px;
+	--radius_alternate_button: 2px;
+	--radius_item_popup: 2px;
+	--radius_wi_card: 5px;
+	--radius_palette_card: 5px;
+	--radius_settings_button: 2px;
+	--tabs_rounding: 2px;
+	--radius_context_menu: 2px;
+	--radius_context_menu_hover: 2px;
+	--radius_genre_tag: 2px;
+	--radius_tooltip: 2px;
+
+
+
+
+
+/*----------------VARIABLES--------------------*/
+	--flyout_menu_closed_width: 0px;
+	--setting_menu_closed_width_no_pins_width:  0px;
+	--story_options_size: 30%;
+	--story_pinned_areas_left:"menuicon options gamescreen lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_areas_right:"menuicon gamescreen options lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_area_widths_left: 30pxvar(--story_options_size) auto 30px;
+	--story_pinned_area_widths_right: 30pxautovar(--story_options_size) 30px;
+	--story_pinned_areas:var(--story_pinned_areas_left);
+	--story_pinned_area_widths:var(--story_pinned_area_widths_left);
+	--font_size_adjustment: 0px;
+	--game_screen_font_size_adjustment: 1;}
+}
+
+/*---------------------------------- Popups -------------------------------------------------*/
+@media only screen and (max-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 10%;
+		z-index: 999;
+		width: 80%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+@media only screen and (min-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 25%;
+		z-index: 999;
+		width: 50%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+.popup .title {
+	width: 100%;
+	background-color: var(--popup_title_bar_color);
+	color: var(--popup_title_bar_color_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+.popup .action_button {
+	background-color: var(--popup_button_color);
+	color: var(--popup_button_color_text);
+}
+
+.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	color: var(--popup_item_color_text);
+	
+}
+
+#modelspecifier, .popup .model_item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+	display: grid;
+	grid-template-areas: "folder_icon delete_icon edit_icon rename_icon file gpu_size warning_icon downloaded_icon";
+	grid-template-columns: 30px 0px 0px 0px auto 50px 30px 30px;
+	
+}
+
+.popup .model_item .folder_icon {
+	grid-area: folder_icon;
+}
+
+.popup .model_item .edit_icon {
+	grid-area: edit_icon;
+}
+
+.popup .model_item .rename_icon {
+	grid-area: rename_icon;
+}
+
+.popup .model_item .delete_icon {
+	grid-area: delete_icon;
+}
+
+.popup .model_item .model {
+	cursor: pointer;
+	grid-area: file;
+}
+
+.popup .header {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	padding: 2px;
+}
+
+.popup .item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+}
+
+.popup .item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .item.selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.popup .popup_load_cancel {
+	text-align: center;
+	vertical-align: bottom;
+	color: var(--popup_title_bar_color_text);
+	background-color: var(--popup_title_bar_color);
+	padding: 0 10px 0 10px;
+}
+
+
+.popup_load_cancel_button {
+	color: var(--popup_cancel_button_color_text);
+	border-color: var(--popup_cancel_button_color_text);
+	background-color: var(--popup_cancel_button_color);
+	vertical-align: bottom;
+	display: inline;
+}
+
+.table-header-container {
+	display: flex;
+	justify-content: space-between;
+	cursor: pointer;
+}
+
+.table-header-sort-icon {
+	margin-right: 10px;
+	margin-top: 2px;
+}
+
+.table-header-label {
+	margin-top: 4px;
+}
+
+#error_message.popup {
+	background-color: var(--error);
+	color: var(--error_text);
+	overflow: hidden;
+}
+
+#error_message .title {
+	width: 100%;
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+#error_message.popup .btn-primary {
+	background-color: var(--error);
+	color: var(--error_text);
+	border-color: var(--error_text);
+}
+
+
+#error_message .popup_load_cancel {
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+}
+
+
+#error_message.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	background-color: var(--error);
+	color: var(--error_text);
+}
+
+.breadcrumbitem {
+	padding: 5px 10px 5px 10px;
+	color: #ffffff;
+	background-color: transparent;
+	border: none;
+	
+	-moz-transition: background-color 0.25s ease-in;
+	-o-transition: background-color 0.25s ease-in;
+	-webkit-transition: background-color 0.25s ease-in;
+	transition: background-color 0.25s ease-in;
+}
+
+.breadcrumbitem:hover {
+	cursor: pointer;
+	background-color: #688f1f;
+}
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
+/*----------------------------- Model Load Popup ------------------------------------------*/
+
+#specspan, .popup_list_area .model_item .model {
+	grid-area: file;
+	display: grid;
+	grid-template-areas: "item gpu_size";
+	grid-template-columns: auto 95px;
+	cursor: pointer;
+}
+
+#specspan {
+	grid-template-columns: auto 100px !important;
+	cursor: auto !important;
+}
+
+#model-spec-usage {
+	position: relative;
+	left: -20px;
+}
+
+.popup .model_item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .model_item .selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.model_setting_container {
+	display: grid;
+	grid-template-areas: "label label"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px 16px 19px;
+	grid-template-columns: auto 40px;
+	row-gap: 0.2em;
+	border: 1px;
+	margin: 2px;
+}
+
+.model_setting_minlabel {
+	grid-area: minlabel;
+	padding-top: 3px;
+	color: var(--popup_title_bar_color_text);
+	overflow: hidden;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_maxlabel {
+	color: var(--popup_title_bar_color_text);
+	padding-top: 3px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_label {
+	color: var(--popup_title_bar_color_text);
+	grid-area: label;
+	overflow: hidden;
+	text-align: left;
+}
+
+.model_setting_value {
+	color: var(--popup_title_bar_color_text);
+	text-align: left;
+	grid-area: label;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.model_setting_value:focus {
+	color: var(--text_edit);
+}
+
+.model_setting_item {
+
+	grid-area: item;
+	overflow: hidden;
+}
+
+.model_setting_item_input {
+	width:95%;
+}
+
+@font-face {
+  font-family: 'Material Icons Outlined';
+  font-style: normal;
+  src: url(/static/MaterialIconsOutlined-Regular.otf) format('opentype');
+}
+
+.material-icons-outlined {
+  font-family: 'Material Icons Outlined';
+  font-weight: normal;
+  font-style: normal;
+  font-size: calc(24px + var(--font_size_adjustment));  /* Preferred icon size */
+  display: inline-block;
+  line-height: 1;
+  text-transform: none;
+  letter-spacing: normal;
+  word-wrap: normal;
+  white-space: nowrap;
+  direction: ltr;
+
+  /* Support for all WebKit browsers. */
+  -webkit-font-smoothing: antialiased;
+  /* Support for Safari and Chrome. */
+  text-rendering: optimizeLegibility;
+
+  /* Support for Firefox. */
+  -moz-osx-font-smoothing: grayscale;
+
+  /* Support for IE. */
+  font-feature-settings: 'liga';
+}
+
+.material-icons-outlined.cursor:hover{
+	filter: brightness(85%);
+}
+
+.setting_label .helpicon {
+	color: var(--help_icon);
+	cursor: help;
+	font-size: calc(14px + var(--font_size_adjustment)) !important;
+	flex: auto;
+	width: 15px;
+	align-self: flex-end;
+
+	line-height: inherit;
+	border-radius: inherit;
+	margin-right: inherit;
+	padding: inherit;
+	background: inherit;
+	border: inherit;
+	text-decoration: inherit;
+	
+}
+
+#tooltip-text {
+	content: attr(tooltip);
+	position: fixed;
+	transition: opacity  0s linear 0.5s;
+	white-space: normal;
+	border-radius: var(--radius_tooltip);
+
+	opacity: 1;
+	
+	padding: 7px;
+	color: var(--tooltip_text);
+	background-color: var(--tooltip_background);
+
+	pointer-events: none;
+	z-index: 9999999;
+}
+
+.tooltip-standard {
+	border: 1px ridge grey;
+	font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
+	width: min-context;
+	max-width: 25%;
+	/*margin-right: -3px;*/
+}
+
+.tooltip-context-token {
+	border: none;
+	font-family: monospace;
+	max-width: min-content;
+}
+
+
+/* Mobile tooltips */
+@media (pointer: coarse), (hover: none) {
+	[tooltip]:after {
+		opacity: 0;
+		content: "";
+	}
+
+	[tooltip]:hover::after {
+		content: attr(tooltip);
+		position: fixed;
+
+		top: calc(var(--mouse-y) * 100vh);
+		left: calc(var(--mouse-x) * 100vw);
+		transform: translate(var(--tooltip_x), var(--tooltip-y));
+		transition: opacity  0s linear 0.5s;
+		opacity: 1;
+		
+
+		padding: 0px 2px;
+		background-color: rgba(0, 0, 0, 0.6);
+
+		pointer-events: none;
+		z-index: 9999999;
+	}
+}
+
+.popup .model_item .model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
 }
\ No newline at end of file
diff --git a/static/koboldai.css b/static/koboldai.css
index b70c6877..85aea08a 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2012,6 +2012,11 @@ body {
 	grid-area: file;
 }
 
+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
+}
+
 .popup .header {
 	width: 98%;
 	background-color: var(--popup_item_color);
diff --git a/static/koboldai.js b/static/koboldai.js
index 399e52cf..99595879 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1587,6 +1587,12 @@ function show_model_menu(data) {
 			selected_model_data = obj;
 			//send the data to the server so it can figure out what data we need from the user for the model
 			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
 		}
 		
 		//name text
diff --git a/templates/index.html b/templates/index.html
index af99390f..28f46a4c 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -18,7 +18,7 @@
 	<script src="static/bootstrap.min.js"></script>
 	<script src="static/bootstrap-toggle.min.js"></script>
 	<script src="static/rangy-core.min.js"></script>
-	<script src="static/application.js?ver=1.18.1f"></script>
+	<script defer src="static/application.js?ver=1.18.1f"></script>
 	<script src="static/favicon.js"></script>
 </head>
 <body>
@@ -283,59 +283,7 @@
 			</div>
 		</div>
 	</div>
-	<div class="popupcontainer hidden" id="loadmodelcontainer">
-		<div class="loadpopup">
-			<div class="popuptitlebar">
-				<div class="popuptitletext">Select A Model To Load</div>
-			</div>
-			<div id="loadmodellistbreadcrumbs">
-				
-			</div>
-			<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
-			</div>
-			<div class="popupfooter">
-				<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
-				<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
-			</div>
-			<div class="popupfooter">
-				<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
-			</div>
-			<div class="popupfooter hidden" id=modellayers>
-				<div class='settingitem' style="width:100%">
-					<div class='settinglabel'>
-						<div class="justifyleft">
-							GPU/Disk Layers
-							<span class="helpicon">?
-								<span class="helptext">Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.</span>
-							</span>
-						</div>
-						<div class="justifyright" id="gpu_layers_current">0</div>
-					</div>
-					<div id=model_layer_bars style="color: white">
-						
-					</div>
-					<input type=hidden id='gpu_count' value=0/>
-					<div class="settingminmax">
-						<div class="justifyleft">
-							0
-						</div>
-						<div class="justifyright" id="gpu_layers_max">
-							24
-						</div>
-					</div>
-				</div>
-			</div>
-			<div class="popupfooter">
-				<button type="button" class="btn btn-primary" id="btn_loadmodelaccept">Load</button>
-				<button type="button" class="btn btn-primary" id="btn_loadmodelclose">Cancel</button>
-				<div class="box flex-push-right hidden" id=use_gpu_div>
-					<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-					<div class="box-label">Use GPU</div>
-				</div>
-			</div>
-		</div>
-	</div>
+	
 	<div class="popupcontainer hidden" id="spcontainer">
 		<div id="sppopup">
 			<div class="popuptitlebar">
@@ -513,6 +461,12 @@
 			</div>
 		</div>
 	</div>
+	<!------------- Pop-Ups ------------------------------->
+	{% include 'popups.html' %}
 	
+	<!------------- Templates ------------------------------->
+	<div class="hidden">
+		{% include 'templates.html' %}
+	</div>
 </body>
 </html>

From a1036465af02cefda32af06d4d3a04b0161aa118 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 12:46:02 -0400
Subject: [PATCH 17/68] Add warning about command line changes and new modular
 backend

---
 data/one_time_messages.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/data/one_time_messages.json b/data/one_time_messages.json
index 3062827b..7485fd15 100644
--- a/data/one_time_messages.json
+++ b/data/one_time_messages.json
@@ -8,5 +8,10 @@
 		"id": 2,
 		"title": "Changes since last version",
 		"message": "<h2>New Features</h2>\n<h3>Phrase Biasing</h3>\nThere is now a Phrase Biasing implementation under Settings -> Biasing. You can now encourage or discourage the AI to generate words or phrases (without needing to use a userscript)\n<h3>Context viewer</h3>\nThe Context Viewer allows you to see what is sent to the AI. Given that only so much text can be read by the AI at a time, it's useful to know exactly what it's looking at.\n<h3>Story Commentary</h3>\nStory Commentary allows custom characters to speak their mind on your story. This can be configured under Settings -> Story Commentary. Characters can be added as World Into entries with a type of \"Commentator\".\n<h3>New Chat UI (Experimental)</h3>\nThis new interface for Chat Mode is a more visually \"chat like\" in comparison to the old text-based mode. To activate it, ensure both Chat Mode (Home -> Game Mode) and Experimental UI (Interface -> Experimental UI) are enabled, then change the Chat Style (Interface -> Chat Style) to \"Messages\". Similarly, to the story commentators, characters can be defined in the World Info menu; if a character's name matches a chat character defined in the World Info menu, the image on the character entry will be used as an icon. \n<h3>Tweaks</h3>\nTweaks allow small UI changes to be mixed and matched to create a more personalized interface.\n<h3>Attention Bias (Experimental)</h3>\nAttention Bias hopes to cause some parts of the context to be internally weighed more than others in self attention. This is very experimental, and only works on OPT-based models for now.\n<h3>Genre</h3>\nThe genre menu (Author's Note -> Genre) prepends genre information to the context. You can either choose from preset genres or write your own. Works better on models trained with genre/tag information, including most new models in the model picker.\n<h3>World Info generation</h3>\nWorld Info entries can now have their text generated automatically from a title and type. Powered by whatever model you have active, so effectiveness will vary with model.\n<h3>Drag and drop import</h3>\nImportable files can now be dragged into the UI to load them.\n<h3>NovelAI lorebook/card support</h3>\nNovelAI lorebooks and cards can now be imported as World Info. If a card is uploaded, the PNG will be used as the World Info image.\n<h3>Finder (Ctrl+K)</h3>\nAllows jumping to various UI elements and performing actions quickly. Mode can be adjusted by pressing the clicking the mode icon or with hotkeys on an empty search box (Search: '#', World Info: '>', Inference Scratchpad: '!', Image Prompting: '?').\n<h3>Club import wizard</h3>\nPrompts imported from aetherroom.club with placeholders will now show a setup prompt where you can input the value of these placeholders.\n<h3>Context menu</h3>\nA context menu has been added and is available in several areas. Give it a try by right-clicking on the main text area.\n<h3>Substitutions</h3>\nSubstitutions allow phrases to be replaced if you or the AI input them into the story. The default Substitutions are disabled and can be enabled with the pencil icon to the right of the entry.\n<h3>Inference scratchpad</h3>\nThe Inference Scratchpad is a way of prompting the AI outside in isolation; the AI will not see anything in your story, and nothing the AI responds with will be added to the story. This can be useful in scenarios where you wish to use the AI in a more generic way. For example, you could prompt it with something like \"List of fantasy names:\" to receive such a list.\n<h3>Error notifications</h3>\nErrors are generally less opaque to the user. Client sided errors and many server errors will show a notification detailing the error.\n<h3>Ctrl+Click to jump to World Info entry</h3>\nHolding Ctrl while clicking on a mention of a World Info tag will bring you to the entry.\n<h3>Model picker indicators</h3>\nThe model picker now has indicators showing if a model is downloaded, may achieve poor quality, or may not load on your system.\n<h3>More shortcuts</h3>\nPress Ctrl+? to view them.\n<h3>Image Generation</h3>\nYou can now generate flavor images based on the game text at each action. In the settings menu in the home tab, you can click generate image to create an image based on the current text. It will appear below the image. Hovering on the image will give you the prompt used to generate the image. You can click on the text of previous actions to see the image associated with that action and can right click on the image and hit retry to generate a new image based on that action.\nSettings for how/where the image is generated are in the left flyout menu under interface, image\n<h3>Text to speech (Experimental)</h3>\nText to speech is now available. To enable it go to the settings menu, enable experimental ui, then enable generate audio. Audio will be generated for your actions. Play buttons will appear next to the submit button, and right clicking an action will give you a new speak option to start reading from that point.\n<h3>UI Mode<h3>\nIn response to feedback, we've added different UI mode levels from simple to power user. Advanced hides some of the less used options, while Power User shows everything. Simple is very much a work in progress, but it intends to simplify the majority of settings to 3 sliders. Feel free to play with it but don't expect good results yet.\n<h3>Presets</h3>\nPresets are now here. Community presets are pre-loaded in KoboldAI and can be selected from the settings tab in the settings menu, or from the home screen. In addition, you can save your own presets and share them with others (or send them to use for future inclusion). Presets are saved in the presets folder\n<h3>Alt Text Gen</h3>\nWith this setting on the system will insert world info text the sentence before the word that triggers it in the AI text. This should make the AI pay more attention to it and make it more likely to influence the output.\n<h3>Alt Multi Gen</h3>\nIf set multiple generations will be generated sequentially rather than at once. This reduces the amount of VRAM required and can let you generate multiple story options with more demanding models at the potential expense of speed\n<h3>Beep on Complete</h3>\nThere is now an option in the settings menu, interface tab called been on complete. If set the browser will beep when generation is complete. Useful for slow systems\n<h3>Privacy Screen (Experimental)</h3>\nBy hitting ctrl+L the screen will be blurred for all users until the password is entered and unlock is clicked (password is set in the settings menu, interface tab.\n<h3>Change Game Text Size</h3>\nGame text can be adjusted to any size\n<h3>No double spaces</h3>\nWhen set double spaces will be replaced by single spaces\n<h3>Themes</h3>\nWe now have a theming engine. Themes can come in 3 flavors. Palette themes use a more basic theming system entirely in the UI. Select the colors from the Palette section and things will change. Advanced themes can have various variables set manually (click the advanced theme button to see). These allow you to go a level deeper than the palette system. Finally, we have custom themes. These are custom CSS code that can do almost anything. All themes can be saved and shared. Saved themes are stored in the themes folder\n<h3>Auto Memory (Experimental)</h3>\nThe start of auto-memory is in place and we are looking for feedback. It currently generates the summary but does not put it in memory (though you can copy-paste it). To see it, turn on experimental ui, go to the story menu, memory tab and click generate under auto-memory. \n<h3>General Notes</h3>\nIf you want a place to write stuff down that saves with the story but doesn't affect it, that's what the notes tab is for. It is found under the story menu, Notes tab\n<h3>W++ (or SBF)</h3>\nIn world info entries you can turn on w++ mode. This will allow you to enter data in the W++ format without having to actually write it.\n<h3>World Info Titles</h3>\nWorld info entries now have titles on them to make it easier to find the one you want. Soon the world info entries will be collapsed to just the title to make navigation easier\n<h3>Download/Upload world info folders</h3>\nWorld info folders can now be downloaded and/or uploaded. This will let you share world info easier.\n<h3>Game Text in AI Context</h3>\nGame text that will be in the AI's context is now bold in the game screen. This will let you easily see where the AI will stop remembering your game (anything not bolded is \"forgotten\")\n<h3>World info context<h3>\nText that triggers a world info entry will now be italicized. Hovering over that text will cause a tooltip to show what the AI text is that will be added.\n<h3>Updated help text</h3>\nHelp text has been expanded throughout the UI.\n<h3>Context Bar</h3> at the bottom of the story menu is a bar that shows how much of the AI's context is in use, and by what. Different colors correspond to different data types (actions, memory, world info, etc)\n<br>\n<br>\n<h2>Improvements</h2>\n<h3>Author's Note</h3>\nThe author's note is now inserted between sentences a configurable distance from the end of the story.  This should improve the coherence of generated text while keeping the author's note relevant."
+	},
+	"3": {
+		"id": 3,
+		"title": "Changes since last version",
+		"message": "<h2>New Features</h2>\n<h3>Modular Model Backends</h3>Model loading is now accomplished via seperate model backend files. This will allow KoboldAI to more easily add in new model backends (examples, 4-bit, GGML, whatever developers want to add) without having to do significant code rework.<h3>Rework of command line arguments</h3> --breakmodel command line arguments have been deleted and if you use those you will have to pass through --model_backend and --model_parameters."
 	}
 }
\ No newline at end of file

From 9df1f03b12ffa2513b15472a96338483178fe760 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 14:28:36 -0400
Subject: [PATCH 18/68] Fix for custom huggingface model menu entry

---
 aiserver.py                     |  36 ++++----
 modeling/inference_models/hf.py | 154 ++++++++++++++++++--------------
 static/application.js           |  23 ++++-
 static/koboldai.js              |  23 ++++-
 4 files changed, 139 insertions(+), 97 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b4aad4e7..fe6d7606 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -233,7 +233,7 @@ model_menu = {
     "mainmenu": [
         MenuPath("Load a model from its directory", "NeoCustom"),
         MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
-        MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
+        MenuModel("Load custom model from Hugging Face", "customhuggingface", ""),
         MenuFolder("Adventure Models", "adventurelist"),
         MenuFolder("Novel Models", "novellist"),
         MenuFolder("Chat Models", "chatlist"),
@@ -6135,7 +6135,7 @@ def UI_2_select_model(data):
             valid_loaders = {}
             for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
                 valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-            emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
+            emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
@@ -6149,24 +6149,20 @@ def UI_2_select_model(data):
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
-    
-    
-    #We've selected a menu
-    if data['model'] in model_menu:
-        sendModelSelection(menu=data['model'])
-    #We've selected a custom line
-    elif data['menu'] in ("NeoCustom", "GPT2Custom"):
-        get_model_info(data['menu'], directory=data['display_name'])
-    #We've selected a custom menu folder
-    elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data:
-        sendModelSelection(menu=data['model'], folder=data['path'])
-    #We've selected a custom menu
-    elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"):
-        sendModelSelection(menu=data['model'], folder="./models")
-    else:
-        #We now have some model we want to potentially load.
-        #First we need to send the client the model parameters (layers, etc)
-        get_model_info(data['model'])
+
+
+
+
+#==================================================================#
+# Event triggered when user changes a model parameter and it's set to resubmit
+#==================================================================#
+@socketio.on('resubmit_model_info')
+@logger.catch
+def UI_2_resubmit_model_info(data):
+    valid_loaders = {}
+    for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data)
+    emit("selected_model_info", {"model_backends": valid_loaders})
 
 #==================================================================#
 # Event triggered when user loads a model
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 6f848fa9..eff3d1ce 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -33,95 +33,111 @@ class HFInferenceModel(InferenceModel):
         except:
             return False
         
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         requested_parameters = []
         if not self.hf_torch:
             return []
-        if model_path is not None and os.path.exists(model_path):
-            self.model_config = AutoConfig.from_pretrained(model_path)
-        elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-            self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
-        else:
-            self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
-        layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
-        if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
-            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
-                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
-                    temp = json.load(f)
-                    break_values = temp['layers'] if 'layers' in temp else [layer_count]
-                    disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
+        if model_name == 'customhuggingface':
+            requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Huggingface Model Name",
+                                        "id": "custom_model_name",
+                                        "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Model name from https://huggingface.co/",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    })
+        
+        if model_name != 'customhuggingface' or "custom_model_name" in parameters:
+            model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name
+            if model_path is not None and os.path.exists(model_path):
+                self.model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
             else:
-                break_values = [layer_count]
-                disk_blocks = 0
-            
-            break_values = [int(x) for x in break_values if x != '' and x is not None]
-            gpu_count = torch.cuda.device_count()
-            break_values += [0] * (gpu_count - len(break_values))
-            if disk_blocks is not None:
-                break_values += [int(disk_blocks)]
-            for i in range(gpu_count):
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                    with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                        temp = json.load(f)
+                        break_values = temp['layers'] if 'layers' in temp else [layer_count]
+                        disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
+                else:
+                    break_values = [layer_count]
+                    disk_blocks = 0
+                
+                break_values = [int(x) for x in break_values if x != '' and x is not None]
+                gpu_count = torch.cuda.device_count()
+                break_values += [0] * (gpu_count - len(break_values))
+                if disk_blocks is not None:
+                    break_values += [int(disk_blocks)]
+                for i in range(gpu_count):
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+                                                    "id": "{}_Layers".format(i),
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": break_values[i],
+                                                    "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
                 requested_parameters.append({
                                                 "uitype": "slider",
                                                 "unit": "int",
-                                                "label": "{} Layers".format(torch.cuda.get_device_name(i)),
-                                                "id": "{}_Layers".format(i),
+                                                "label": "CPU Layers",
+                                                "id": "CPU_Layers",
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
                                                 "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                                "default": break_values[i],
-                                                "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                "default": layer_count - sum(break_values),
+                                                "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",
                                                 "refresh_model_inputs": False
                                             })
-            requested_parameters.append({
-                                            "uitype": "slider",
-                                            "unit": "int",
-                                            "label": "CPU Layers",
-                                            "id": "CPU_Layers",
-                                            "min": 0,
-                                            "max": layer_count,
-                                            "step": 1,
-                                            "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
-                                            "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                            "default": layer_count - sum(break_values),
-                                            "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
-                                            "menu_path": "Layers",
-                                            "extra_classes": "",
-                                            "refresh_model_inputs": False
-                                        })
-            if disk_blocks is not None:
+                if disk_blocks is not None:
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "Disk Layers",
+                                                    "id": "Disk_Layers",
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": disk_blocks,
+                                                    "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
+            else:
                 requested_parameters.append({
-                                                "uitype": "slider",
-                                                "unit": "int",
-                                                "label": "Disk Layers",
-                                                "id": "Disk_Layers",
-                                                "min": 0,
-                                                "max": layer_count,
-                                                "step": 1,
-                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
-                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                                "default": disk_blocks,
-                                                "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                "uitype": "toggle",
+                                                "unit": "bool",
+                                                "label": "Use GPU",
+                                                "id": "use_gpu",
+                                                "default": False,
+                                                "tooltip": "Whether or not to use the GPU",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",
                                                 "refresh_model_inputs": False
                                             })
-        else:
-            requested_parameters.append({
-                                            "uitype": "toggle",
-                                            "unit": "bool",
-                                            "label": "Use GPU",
-                                            "id": "use_gpu",
-                                            "default": False,
-                                            "tooltip": "Whether or not to use the GPU",
-                                            "menu_path": "Layers",
-                                            "extra_classes": "",
-                                            "refresh_model_inputs": False
-                                        })
-                                        
+                                            
         
         return requested_parameters
         
@@ -153,7 +169,7 @@ class HFInferenceModel(InferenceModel):
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
-        self.model_name = parameters['id']
+        self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
     def unload(self):
diff --git a/static/application.js b/static/application.js
index 99a65ed7..ca445c5f 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4009,7 +4009,25 @@ function model_settings_checker() {
 		if (valid || missing_element) {
 			//if we are supposed to refresh when this value changes we'll resubmit
 			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
-				console.log("resubmit");
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+					var element_data = element.value;
+					if (element.getAttribute("data_type") == "int") {
+						element_data = parseInt(element_data);
+					} else if (element.getAttribute("data_type") == "float") {
+						element_data = parseFloat(element_data);
+					} else if (element.getAttribute("data_type") == "bool") {
+						element_data = (element_data == 'on');
+					}
+					data[element.id.split("|")[1].replace("_value", "")] = element_data;
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
 			}
 			if ('sum' in this.check_data) {
 				for (const temp of this.check_data['sum']) {
@@ -4099,9 +4117,6 @@ function selected_model_info(sent_data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
-		if (loader == sent_data['preselected']) {
-			modelplugin.value = sent_data['preselected'];
-		}
 		
 		//create the user input for each requested input
 		for (item of items) {
diff --git a/static/koboldai.js b/static/koboldai.js
index 99595879..dabbcda9 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1683,7 +1683,25 @@ function model_settings_checker() {
 		if (valid || missing_element) {
 			//if we are supposed to refresh when this value changes we'll resubmit
 			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
-				console.log("resubmit");
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+					var element_data = element.value;
+					if (element.getAttribute("data_type") == "int") {
+						element_data = parseInt(element_data);
+					} else if (element.getAttribute("data_type") == "float") {
+						element_data = parseFloat(element_data);
+					} else if (element.getAttribute("data_type") == "bool") {
+						element_data = (element_data == 'on');
+					}
+					data[element.id.split("|")[1].replace("_value", "")] = element_data;
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
 			}
 			if ('sum' in this.check_data) {
 				for (const temp of this.check_data['sum']) {
@@ -1773,9 +1791,6 @@ function selected_model_info(sent_data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
-		if (loader == sent_data['preselected']) {
-			modelplugin.value = sent_data['preselected'];
-		}
 		
 		//create the user input for each requested input
 		for (item of items) {

From 756a33c63e323372716a1321e649f01873ecb533 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:28:39 -0400
Subject: [PATCH 19/68] Added try loop on model backend so it will continue
 with other models.

---
 aiserver.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index fe6d7606..02ea2229 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -627,8 +627,11 @@ model_backend_code = {}
 model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
     if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
-        model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
-        model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+        try:
+            model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+            model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+        except:
+            logger.error("Model Backend {} failed to load".format(module))
         
 
 old_socketio_on = socketio.on
@@ -1572,7 +1575,7 @@ def general_startup(override_args=None):
             elif parameter['id'] not in arg_parameters:
                 arg_parameters[parameter] = parameter['default']
         if not ok_to_load:
-            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
+            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()

From db30402c3bd01432f8a8a8239faee5c8e55991aa Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:30:36 -0400
Subject: [PATCH 20/68] Move RWKV to use Huggingface model backend

---
 aiserver.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 02ea2229..a1d548e9 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -371,16 +371,16 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"),
-        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"),
-        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"),        
-        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [

From b21884fc31c556c81a89158123dfce18ba398640 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:34:15 -0400
Subject: [PATCH 21/68] Better error reporting

---
 aiserver.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index a1d548e9..7e8c09c8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -56,6 +56,7 @@ import html
 import argparse
 import sys
 import gc
+import traceback
 
 import lupa
 
@@ -630,8 +631,10 @@ for module in os.listdir("./modeling/inference_models"):
         try:
             model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
             model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
-        except:
+        except Exception:
             logger.error("Model Backend {} failed to load".format(module))
+            logger.error(traceback.format_exc())
+            
         
 
 old_socketio_on = socketio.on

From 309f1c432ae79acdbeb6b52a6f65ed963ef5d36d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:43:13 -0400
Subject: [PATCH 22/68] Added the ability to disable model backends in the
 model backend code.

---
 aiserver.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 7e8c09c8..40335a9f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -631,10 +631,14 @@ for module in os.listdir("./modeling/inference_models"):
         try:
             model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
             model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+            if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]):
+                if model_backends[model_backend_code[module].model_backend_name].disable:
+                    del model_backends[model_backend_code[module].model_backend_name]
         except Exception:
             logger.error("Model Backend {} failed to load".format(module))
             logger.error(traceback.format_exc())
-            
+
+logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends])))
         
 
 old_socketio_on = socketio.on

From 6df5fe4ad07acb7b901b65ade005ec8af40126dc Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 18:24:06 -0400
Subject: [PATCH 23/68] partial load model from custom path in menu

---
 aiserver.py                                  | 20 ++++++++++++++++----
 modeling/inference_models/api/class.py       |  1 +
 modeling/inference_models/basic_api/class.py |  1 +
 modeling/inference_models/gooseai/class.py   |  1 +
 modeling/inference_models/horde/class.py     |  1 +
 modeling/inference_models/openai/class.py    |  1 +
 modeling/inference_models/openai_gooseai.py  |  6 ++++++
 7 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 40335a9f..14d268be 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6141,11 +6141,19 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        if 'path' not in data or data['path'] == "":
+        if data['ismenu'] == 'false':
             valid_loaders = {}
-            for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
-                valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-            emit("selected_model_info", {"model_backends": valid_loaders})
+            if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
+                #Here if we have a model id that's in our menu, we explicitly use that backend
+                for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
+            else:
+                #Here we have a model that's not in our menu structure (either a custom model or a custom path
+                #so we'll just go through all the possible loaders
+                for model_backend in model_backends:
+                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
@@ -6154,8 +6162,12 @@ def UI_2_select_model(data):
                 valid=False
                 for model_backend in model_backends:
                     if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
+                        logger.debug("{} says valid".format(model_backend))
                         valid=True
                         break
+                    else:
+                        logger.debug("{} says invalid".format(model_backend))
+                    
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index d9ec1147..3d54edd9 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -6,6 +6,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py
index 6f045ef5..2094d34e 100644
--- a/modeling/inference_models/basic_api/class.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -4,6 +4,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py
index 8d58b4b5..1073f45f 100644
--- a/modeling/inference_models/gooseai/class.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 387c5833..2c4c4bf5 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -5,6 +5,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py
index 84fe6df9..492a3fdb 100644
--- a/modeling/inference_models/openai/class.py
+++ b/modeling/inference_models/openai/class.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index 4d885074..e4a027db 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
@@ -30,6 +31,11 @@ class model_backend(InferenceModel):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        try:
+            print(self.source)
+        except:
+            print(vars(self))
+            raise
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
                 self.key = json.load(f)['key']

From a1ee6849dc1d98c287561d5bdb6aff225c0322a5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 18:28:47 -0400
Subject: [PATCH 24/68] Custom Paths from Menu structure fixed

---
 aiserver.py                                 | 3 ++-
 modeling/inference_models/gooseai/class.py  | 2 +-
 modeling/inference_models/openai/class.py   | 2 +-
 modeling/inference_models/openai_gooseai.py | 5 -----
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 14d268be..d4a127f0 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6152,7 +6152,8 @@ def UI_2_select_model(data):
                 #Here we have a model that's not in our menu structure (either a custom model or a custom path
                 #so we'll just go through all the possible loaders
                 for model_backend in model_backends:
-                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                    if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+                        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
                 emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py
index 1073f45f..934f15dd 100644
--- a/modeling/inference_models/gooseai/class.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -19,7 +19,6 @@ model_backend_name = "GooseAI"
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
-        self.source = "GooseAI"
 
 
 class model_backend(openai_gooseai_model_backend):
@@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend):
     def __init__(self):
         super().__init__()
         self.url = "https://api.goose.ai/v1/engines"
+        self.source = "GooseAI"
     
     def is_valid(self, model_name, model_path, menu_path):
         return  model_name == "GooseAI"
\ No newline at end of file
diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py
index 492a3fdb..cea644ea 100644
--- a/modeling/inference_models/openai/class.py
+++ b/modeling/inference_models/openai/class.py
@@ -19,7 +19,6 @@ model_backend_name = "OpenAI"
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
-        self.source = "OpenAI"
 
 
 class model_backend(openai_gooseai_model_backend):
@@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend):
     def __init__(self):
         super().__init__()
         self.url = "https://api.openai.com/v1/engines"
+        self.source = "OpenAI"
     
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI"
\ No newline at end of file
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index e4a027db..e4b9dfb8 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -31,11 +31,6 @@ class model_backend(InferenceModel):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
-        try:
-            print(self.source)
-        except:
-            print(vars(self))
-            raise
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
                 self.key = json.load(f)['key']

From 128c77e0fde7deae7fa30e65cc4166eb46ba314d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 19:01:11 -0400
Subject: [PATCH 25/68] Default model backend to huggingface if not present
 when loading a model through the command line

---
 aiserver.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index d4a127f0..a8591dc3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1365,7 +1365,7 @@ def general_startup(override_args=None):
     parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
-    parser.add_argument("--model_backend", help="Specify the model backend you want to use")
+    parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
     parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
@@ -1558,10 +1558,6 @@ def general_startup(override_args=None):
     
     if args.model:
         # At this point we have to try to load the model through the selected backend
-        if not args.model_backend:
-            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
-            logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends])))
-            exit()
         if args.model_backend not in model_backends:
             logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
             exit()
@@ -1576,11 +1572,11 @@ def general_startup(override_args=None):
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:
-            if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+            if parameter['default'] == "" and parameter['id'] not in arg_parameters:
                 mising_parameters.append(parameter['id'])
                 ok_to_load = False
             elif parameter['id'] not in arg_parameters:
-                arg_parameters[parameter] = parameter['default']
+                arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))

From 19559d5eef5999c48503852d02d45c1c7fcce7ec Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 19:15:25 -0400
Subject: [PATCH 26/68] Fix for colors in the classic UI

---
 static/custom.css        | 74 ++++++++++++++++++++++++++++++++++++++++
 templates/templates.html |  1 -
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/static/custom.css b/static/custom.css
index ffa6f44f..412c7f1b 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2330,4 +2330,78 @@ body.connected .popupfooter, .popupfooter.always-available {
 .popup .model_item .model_menu_selected {
 	color: var(--popup_selected_color);
 	background-color: var(--popup_selected_color_text);
+}
+
+.settings_select {
+	color: var(--dropdown_text);
+	background: var(--dropdown_background);
+	margin-left: auto;
+	margin-right: 25px;
+}
+
+.setting_value {
+	text-align: right;
+	grid-area: value;
+	font-size: calc(12px + var(--font_size_adjustment));
+	padding: 2px;
+	padding-top: 0px;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.setting_value:focus {
+	color: var(--text_edit);
+}
+
+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px auto 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
+.setting_minlabel {
+	padding-top: 6px;
+	grid-area: minlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.setting_maxlabel {
+	padding-top: 6px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.setting_label {
+	display: flex;
+	grid-area: label;
+	overflow: hidden;
+	padding: 5px;
+	padding-right: 0px;
+	padding-top: 0px;
 }
\ No newline at end of file
diff --git a/templates/templates.html b/templates/templates.html
index 49fa99f6..926bf854 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -1,5 +1,4 @@
 <!---------------- World Info Card ---------------------->
-<link href="static/koboldai.css" rel="stylesheet">
 <div draggable="true" class="world_info_card" id="world_info_">
 	<div class="world_info_title_area">
 		<div>

From 513b8575e71d164fc82747009f8fd3391f4ceb28 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 20 May 2023 11:01:49 -0400
Subject: [PATCH 27/68] Fix for missing import Fix for model name being a path
 which caused save issues

---
 aiserver.py                     | 2 +-
 modeling/inference_models/hf.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index a8591dc3..38ffc3f6 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6165,7 +6165,7 @@ def UI_2_select_model(data):
                     else:
                         logger.debug("{} says invalid".format(model_backend))
                     
-                output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+                output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index eff3d1ce..318423d5 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -8,6 +8,7 @@ import koboldai_settings
 from logger import logger
 from modeling.inference_model import InferenceModel
 import torch
+import gc
 
 
 class HFInferenceModel(InferenceModel):

From 925cad2e2fa6c65b8ea37680d19fa69023cce9f5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 14:50:13 -0400
Subject: [PATCH 28/68] Better compatibility with hf model backend

---
 modeling/inference_models/hf.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 318423d5..b209d49f 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -1,4 +1,4 @@
-import os
+import os, sys
 from typing import Optional
 from transformers import AutoConfig
 import warnings
@@ -196,9 +196,10 @@ class HFInferenceModel(InferenceModel):
         except:
             pass
         if self.hf_torch:
-            breakmodel.breakmodel = True
-            breakmodel.gpu_blocks = []
-            breakmodel.disk_blocks = 0
+            if 'breakmodel' in sys.modules:
+                breakmodel.breakmodel = True
+                breakmodel.gpu_blocks = []
+                breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults

From dc20e6dde9152fd609ae06d362b05b9a0ac29bb5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 15:04:33 -0400
Subject: [PATCH 29/68] Fix for unloading models

---
 modeling/inference_models/hf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index b209d49f..53c802b1 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -197,6 +197,7 @@ class HFInferenceModel(InferenceModel):
             pass
         if self.hf_torch:
             if 'breakmodel' in sys.modules:
+                import breakmodel
                 breakmodel.breakmodel = True
                 breakmodel.gpu_blocks = []
                 breakmodel.disk_blocks = 0

From ca770844b0d6002f07d5b347190be0b25e6faf3d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 15:07:59 -0400
Subject: [PATCH 30/68] Fix for breakmodel

---
 modeling/inference_models/hf_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 5dd53bf8..47c37436 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -820,7 +820,7 @@ class HFTorchInferenceModel(HFInferenceModel):
             breakmodel.gpu_blocks = [0] * n_layers
             return
 
-        elif breakmodel.gpu_blocks != []:
+        elif breakmodel.gpu_blocks == []:
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0

From f1a16f260f4f22384ae882042860228134bf6222 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 16:10:41 -0400
Subject: [PATCH 31/68] Potential breakmodel fix

---
 modeling/inference_models/hf_torch.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 47c37436..5595edc7 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -824,6 +824,20 @@ class HFTorchInferenceModel(HFInferenceModel):
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0
+        
+        else:
+            s = n_layers
+            for i in range(len(breakmodel.gpu_blocks)):
+                if breakmodel.gpu_blocks[i] <= -1:
+                    breakmodel.gpu_blocks[i] = s
+                    break
+                else:
+                    s -= breakmodel.gpu_blocks[i]
+            assert sum(breakmodel.gpu_blocks) <= n_layers
+            n_layers -= sum(breakmodel.gpu_blocks)
+            if breakmodel.disk_blocks is not None:
+                assert breakmodel.disk_blocks <= n_layers
+                n_layers -= breakmodel.disk_blocks
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)

From 9e53bcf67684198bbbaeb3e67281c1641419f448 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:24:57 -0400
Subject: [PATCH 32/68] Fix for breakmodel loading to CPU when set to GPU

---
 modeling/inference_models/generic_hf_torch/class.py | 8 +++++---
 modeling/inference_models/hf.py                     | 6 ++++--
 modeling/inference_models/hf_torch.py               | 3 +++
 static/custom.css                                   | 5 +++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 4e2c8a5b..572337e2 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel):
 
         self.patch_embedding()
 
+        
         if utils.koboldai_vars.hascuda:
-            if utils.koboldai_vars.usegpu:
+            if self.usegpu:
                 # Use just VRAM
                 self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
-            elif utils.koboldai_vars.breakmodel:
+            elif self.breakmodel:
                 # Use both RAM and VRAM (breakmodel)
                 if not self.lazy_load:
                     self.breakmodel_device_config(self.model.config)
@@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel):
             self._move_to_devices()
         else:
             self.model = self.model.to("cpu").float()
-
+        
+        
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 53c802b1..e801eab2 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel):
                          layers.append(None)
                     else:
                         layers.append(parameters["{}_Layers".format(i)])
-                self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
                 if isinstance(self.cpu_layers, str):
                     self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
                 self.layers = layers
@@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel):
                     self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
                 breakmodel.gpu_blocks = layers
                 breakmodel.disk_blocks = self.disk_layers
-            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+                self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
+        else:
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 5595edc7..c5560360 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel):
             return "Unknown"
 
     def _post_load(m_self) -> None:
+
         if not utils.koboldai_vars.model_type:
             utils.koboldai_vars.model_type = m_self.get_model_type()
 
@@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 )
                             )
                             # print(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
+                            #logger.debug(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
                             model_dict[key] = model_dict[key].materialize(
                                 f, map_location="cpu"
                             )
@@ -847,6 +849,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         # If all layers are on the same device, use the old GPU generation mode
         while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
             breakmodel.gpu_blocks.pop()
+        self.breakmodel = True
         if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
             -1,
             utils.num_layers(config),
diff --git a/static/custom.css b/static/custom.css
index 412c7f1b..968d73e4 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2404,4 +2404,9 @@ body.connected .popupfooter, .popupfooter.always-available {
 	padding: 5px;
 	padding-right: 0px;
 	padding-top: 0px;
+}
+
+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
 }
\ No newline at end of file

From 4c25d6fbbbfad67176056a6f5af1826c2c2eb24c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:34:01 -0400
Subject: [PATCH 33/68] Fix for loading model multiple times loosing the
 gpu/cpu splits

---
 modeling/inference_models/hf.py       | 6 ------
 modeling/inference_models/hf_torch.py | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index e801eab2..b50ebf56 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel):
                 torch.cuda.empty_cache()
         except:
             pass
-        if self.hf_torch:
-            if 'breakmodel' in sys.modules:
-                import breakmodel
-                breakmodel.breakmodel = True
-                breakmodel.gpu_blocks = []
-                breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index c5560360..681d3ab1 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         if device_count < 2:
             primary = None
         logger.debug("n_layers: {}".format(n_layers))
+        logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
         gpu_blocks = breakmodel.gpu_blocks + (
             device_count - len(breakmodel.gpu_blocks)
         ) * [0]
@@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         n_layers = utils.num_layers(config)
 
+        logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
+
         if utils.args.cpu:
             breakmodel.gpu_blocks = [0] * n_layers
             return

From 48226191922a48024a75a531668d3638b1f71155 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:47:14 -0400
Subject: [PATCH 34/68] Fix for model backends that have no inputs not being
 able to load in the UI

---
 static/koboldai.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/static/koboldai.js b/static/koboldai.js
index dabbcda9..c4b2e160 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1933,6 +1933,8 @@ function selected_model_info(sent_data) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
+	model_settings_checker()
+	
 }
 
 function update_gpu_layers() {

From 5561cc1f220c0cf9d957bcbd3e535ad88502ab82 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 08:33:19 -0400
Subject: [PATCH 35/68] Fix for GPU generation

---
 modeling/inference_models/hf_torch.py | 13 ++++++++-
 static/application.js                 | 42 +++++++++++++++------------
 static/koboldai.js                    | 40 +++++++++++++------------
 3 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 681d3ab1..2f575e73 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -125,6 +125,17 @@ class HFTorchInferenceModel(HFInferenceModel):
         else:
             return "Unknown"
 
+    def get_auxilary_device(self):
+        """Get device auxilary tensors like inputs should be stored on."""
+
+        # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
+        if utils.koboldai_vars.hascuda and self.usegpu:
+            return utils.koboldai_vars.gpu_device
+        elif utils.koboldai_vars.hascuda and self.breakmodel:
+            import breakmodel
+            return breakmodel.primary_device
+        return "cpu"
+
     def _post_load(m_self) -> None:
 
         if not utils.koboldai_vars.model_type:
@@ -226,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         else:
             gen_in = prompt_tokens
 
-        device = utils.get_auxilary_device()
+        device = self.get_auxilary_device()
         gen_in = gen_in.to(device)
 
         additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
diff --git a/static/application.js b/static/application.js
index ca445c5f..ca81f729 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4012,16 +4012,18 @@ function model_settings_checker() {
 				//get an object of all the input settings from the user
 				data = {}
 				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
-				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-					var element_data = element.value;
-					if (element.getAttribute("data_type") == "int") {
-						element_data = parseInt(element_data);
-					} else if (element.getAttribute("data_type") == "float") {
-						element_data = parseFloat(element_data);
-					} else if (element.getAttribute("data_type") == "bool") {
-						element_data = (element_data == 'on');
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
 					}
-					data[element.id.split("|")[1].replace("_value", "")] = element_data;
 				}
 				data = {...data, ...selected_model_data};
 				
@@ -4259,6 +4261,8 @@ function selected_model_info(sent_data) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
+	model_settings_checker();
+	
 }
 
 function getModelParameterCount(modelName) {
@@ -4371,16 +4375,18 @@ function load_model() {
 	
 	//get an object of all the input settings from the user
 	data = {}
-	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		var element_data = element.value;
-		if (element.getAttribute("data_type") == "int") {
-			element_data = parseInt(element_data);
-		} else if (element.getAttribute("data_type") == "float") {
-			element_data = parseFloat(element_data);
-		} else if (element.getAttribute("data_type") == "bool") {
-			element_data = (element_data == 'on');
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if (element.getAttribute("data_type") == "int") {
+				element_data = parseInt(element_data);
+			} else if (element.getAttribute("data_type") == "float") {
+				element_data = parseFloat(element_data);
+			} else if (element.getAttribute("data_type") == "bool") {
+				element_data = (element_data == 'on');
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
-		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	
diff --git a/static/koboldai.js b/static/koboldai.js
index c4b2e160..f0a1f6f8 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1686,16 +1686,18 @@ function model_settings_checker() {
 				//get an object of all the input settings from the user
 				data = {}
 				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
-				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-					var element_data = element.value;
-					if (element.getAttribute("data_type") == "int") {
-						element_data = parseInt(element_data);
-					} else if (element.getAttribute("data_type") == "float") {
-						element_data = parseFloat(element_data);
-					} else if (element.getAttribute("data_type") == "bool") {
-						element_data = (element_data == 'on');
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
 					}
-					data[element.id.split("|")[1].replace("_value", "")] = element_data;
 				}
 				data = {...data, ...selected_model_data};
 				
@@ -1965,16 +1967,18 @@ function load_model() {
 	
 	//get an object of all the input settings from the user
 	data = {}
-	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		var element_data = element.value;
-		if (element.getAttribute("data_type") == "int") {
-			element_data = parseInt(element_data);
-		} else if (element.getAttribute("data_type") == "float") {
-			element_data = parseFloat(element_data);
-		} else if (element.getAttribute("data_type") == "bool") {
-			element_data = (element_data == 'on');
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if (element.getAttribute("data_type") == "int") {
+				element_data = parseInt(element_data);
+			} else if (element.getAttribute("data_type") == "float") {
+				element_data = parseFloat(element_data);
+			} else if (element.getAttribute("data_type") == "bool") {
+				element_data = (element_data == 'on');
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
-		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	

From 7a8e4c39da3c1d30ddf3489945799b2695d9be86 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 08:35:15 -0400
Subject: [PATCH 36/68] Fix for attention bias

---
 aiserver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 38ffc3f6..6276e514 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3693,7 +3693,8 @@ def calcsubmit(txt):
                         bias += [1] * (i - top_index)
                     bias[i] = b["multiplier"]
 
-            device = utils.get_auxilary_device()
+            
+            device = model.get_auxilary_device()
             attention_bias.attention_bias = torch.Tensor(bias).to(device)
             logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}")
         logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))

From 839d56ebf2e7409705a109722bf55edd0fcee77c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 19:25:01 -0400
Subject: [PATCH 37/68] Potential fix for gpt-neo and gpt-j

---
 modeling/inference_models/hf.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index b50ebf56..2417bffb 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -199,8 +199,9 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
+        self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
-        if utils.koboldai_vars.model_type == "llama":
+        if self.model_type == "llama":
             # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
             self.tokenizer.add_bos_token = False
 
@@ -284,23 +285,23 @@ class HFInferenceModel(InferenceModel):
                 return result
             object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
 
-        elif utils.koboldai_vars.model_type == "opt":
+        elif self.model_type == "opt":
             self.tokenizer._koboldai_header = self.tokenizer.encode("")
             self.tokenizer.add_bos_token = False
             self.tokenizer.add_prefix_space = False
 
         # Change newline behavior to match model quirks
-        if utils.koboldai_vars.model_type == "xglm":
+        if self.model_type == "xglm":
             # Default to </s> newline mode if using XGLM
             utils.koboldai_vars.newlinemode = "s"
-        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+        elif self.model_type in ["opt", "bloom"]:
             # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
             utils.koboldai_vars.newlinemode = "ns"
 
         # Clean up tokens that cause issues
         if (
             utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
             utils.koboldai_vars.badwordsids = [
                 [v]
@@ -357,15 +358,15 @@ class HFInferenceModel(InferenceModel):
                 revision=utils.koboldai_vars.revision,
                 cache_dir="cache",
             )
-            utils.koboldai_vars.model_type = self.model_config.model_type
+            self.model_type = self.model_config.model_type
         except ValueError:
-            utils.koboldai_vars.model_type = {
+            self.model_type = {
                 "NeoCustom": "gpt_neo",
                 "GPT2Custom": "gpt2",
-            }.get(utils.koboldai_vars.model)
+            }.get(self.model)
 
-            if not utils.koboldai_vars.model_type:
+            if not self.model_type:
                 logger.warning(
                     "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                 )
-                utils.koboldai_vars.model_type = "gpt_neo"
\ No newline at end of file
+                self.model_type = "gpt_neo"
\ No newline at end of file

From 9bd445c2a8d24a20b04aa905486c367455286ff9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 20:33:55 -0400
Subject: [PATCH 38/68] gpt2 fixed

---
 modeling/inference_models/generic_hf_torch/class.py |  2 +-
 modeling/inference_models/hf.py                     | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 572337e2..bbd42096 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -59,7 +59,7 @@ class model_backend(HFTorchInferenceModel):
             "low_cpu_mem_usage": True,
         }
 
-        if utils.koboldai_vars.model_type == "gpt2":
+        if self.model_type == "gpt2":
             # We must disable low_cpu_mem_usage and if using a GPT-2 model
             # because GPT-2 is not compatible with this feature yet.
             tf_kwargs.pop("low_cpu_mem_usage", None)
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 2417bffb..7b8f356c 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -61,6 +61,7 @@ class HFInferenceModel(InferenceModel):
             else:
                 self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
             layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
                     with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
@@ -143,15 +144,13 @@ class HFInferenceModel(InferenceModel):
         return requested_parameters
         
     def set_input_parameters(self, parameters):
-        if self.hf_torch:
+        if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2":
             import breakmodel
             layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 gpu_count = torch.cuda.device_count()
                 layers = []
-                logger.info(parameters)
                 for i in range(gpu_count):
-                    logger.info(parameters["{}_Layers".format(i)])
                     if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
                         layers.append(int(parameters["{}_Layers".format(i)]))
                     elif isinstance(parameters["{}_Layers".format(i)], str):
@@ -170,8 +169,13 @@ class HFInferenceModel(InferenceModel):
                 self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
+            self.lazy_load = True
+            logger.debug("Model type: {}".format(self.model_type))
         else:
+            logger.debug("Disabling breakmodel and lazyload")
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.breakmodel = False
+            self.lazy_load = False
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
@@ -199,6 +203,7 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
+        utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
         self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
         if self.model_type == "llama":

From 935480a701c8cb1f672db15143af0cf6f6d006e9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 08:45:22 -0400
Subject: [PATCH 39/68] Added bad words to the transmit list for easier
 debugging

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 5467fe29..29a82406 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -647,7 +647,7 @@ class settings(object):
                     raise
 
 class model_settings(settings):
-    local_only_variables = ['badwordsids', 'apikey', 'default_preset']
+    local_only_variables = ['apikey', 'default_preset']
     no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns', 
                          'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', 
                          'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',

From 9d708bc4246e77230eeaee43d75dff5c1d4f294b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 08:56:52 -0400
Subject: [PATCH 40/68] Logging of environmental variables over-riding command
 line arguments

---
 aiserver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aiserver.py b/aiserver.py
index 6276e514..97472f81 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1405,6 +1405,7 @@ def general_startup(override_args=None):
         args = parser.parse_args(shlex.split(override_args))
     elif(os.environ.get("KOBOLDAI_ARGS") is not None):
         import shlex
+        logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"]))
         args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
     else:
         args = parser.parse_args()

From c61e2b676a5917072d665812849e4407632c1724 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 09:05:21 -0400
Subject: [PATCH 41/68] More environmental variable feedback

---
 aiserver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 97472f81..777b36d3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1428,9 +1428,11 @@ def general_startup(override_args=None):
     for arg in temp:
         if arg == "path":
             if "model_path" in os.environ:
+                logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"]))
                 setattr(args, arg, os.environ["model_path"])
         else:
             if arg in os.environ:
+                logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg]))
                 if isinstance(getattr(args, arg), bool):
                     if os.environ[arg].lower() == "true":
                         setattr(args, arg, True)

From 068173b24a3f0da9df8144db7a12052d814874cb Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 09:08:34 -0400
Subject: [PATCH 42/68] Potential BadWords fix

---
 modeling/inference_models/hf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 7b8f356c..032b8ec3 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -203,7 +203,7 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
-        utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
+        self.badwordsids = koboldai_settings.badwordsids_default
         self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
         if self.model_type == "llama":
@@ -305,17 +305,17 @@ class HFInferenceModel(InferenceModel):
 
         # Clean up tokens that cause issues
         if (
-            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
+            self.badwordsids == koboldai_settings.badwordsids_default
             and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
-            utils.koboldai_vars.badwordsids = [
+            self.badwordsids = [
                 [v]
                 for k, v in self.tokenizer.get_vocab().items()
                 if any(c in str(k) for c in "[]")
             ]
 
             if utils.koboldai_vars.newlinemode == "n":
-                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+                self.badwordsids.append([self.tokenizer.eos_token_id])
 
         return super()._post_load()
 

From 92f592ea203a8f758a73d982aa5fb96ee3670eed Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 11:48:25 -0400
Subject: [PATCH 43/68] Fix for model name not showing correctly on load in UI1

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 777b36d3..42715de6 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1643,7 +1643,7 @@ def load_model(model_backend, initial_load=False):
     koboldai_vars.noai = False
     set_aibusy(True)
     if koboldai_vars.model != 'ReadOnly':
-        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
+        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True)
         #Have to add a sleep so the server will send the emit for some reason
         time.sleep(0.1)
 

From 1a1b79a16d2e5d7cc7e8865350de0e8afde24357 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 12:44:26 -0400
Subject: [PATCH 44/68] Change default for HF on non-breakmodel models to use
 GPU instead of CPU

---
 modeling/inference_models/hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 032b8ec3..ee585321 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -133,7 +133,7 @@ class HFInferenceModel(InferenceModel):
                                                 "unit": "bool",
                                                 "label": "Use GPU",
                                                 "id": "use_gpu",
-                                                "default": False,
+                                                "default": True,
                                                 "tooltip": "Whether or not to use the GPU",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",

From b116e22bca85f059976711063850bdbfc5430522 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 16:47:19 -0400
Subject: [PATCH 45/68] Fix for colab

---
 aiserver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 42715de6..b06aaa83 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1559,6 +1559,9 @@ def general_startup(override_args=None):
     
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
+    if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
+         args.model_backend = "Huggingface MTJ"
+    
     if args.model:
         # At this point we have to try to load the model through the selected backend
         if args.model_backend not in model_backends:
@@ -1593,6 +1596,7 @@ def general_startup(override_args=None):
         return args.model_backend
     else:
         return "Read Only"
+        
     
         
     

From 5fe8c71b2ed9132ca591d3797d1deca6f8e8762e Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 18:55:31 -0400
Subject: [PATCH 46/68] TPU Fixes

---
 modeling/inference_models/hf_mtj/class.py | 3 ++-
 tpu_mtj_backend.py                        | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 4de3a1b2..876e950e 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -186,6 +186,7 @@ class model_backend(HFInferenceModel):
 
         tpu_mtj_backend.load_model(
             utils.koboldai_vars.model,
+            self.model_type,
             hf_checkpoint=utils.koboldai_vars.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
@@ -202,7 +203,7 @@ class model_backend(HFInferenceModel):
 
         if (
             utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
             utils.koboldai_vars.badwordsids = [
                 [v]
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 07261636..d5a4d1db 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -941,7 +941,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
     koboldai_vars.status_message = ""
 
-def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):
@@ -989,9 +989,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa
 
     # Try to convert HF config.json to MTJ config
     if hf_checkpoint:
-        spec_path = os.path.join("maps", koboldai_vars.model_type + ".json")
+        spec_path = os.path.join("maps", model_type + ".json")
         if not os.path.isfile(spec_path):
-            raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}")
+            raise NotImplementedError(f"Unsupported model type {repr(model_type)}")
         with open(spec_path) as f:
             lazy_load_spec = json.load(f)
 

From 6620df535035f8717f402e06381de062cd81918f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:03:49 -0400
Subject: [PATCH 47/68] debug info

---
 modeling/inference_models/hf_mtj/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 876e950e..91ddf03d 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -184,6 +184,7 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
+        logger.info(self.model_type)
         tpu_mtj_backend.load_model(
             utils.koboldai_vars.model,
             self.model_type,

From 703da112ee8b6b14ead182a157b9cc82e6493707 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:17:51 -0400
Subject: [PATCH 48/68] TPU Fix

---
 modeling/inference_models/hf_mtj/class.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 91ddf03d..00fbfec3 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -186,9 +186,9 @@ class model_backend(HFInferenceModel):
 
         logger.info(self.model_type)
         tpu_mtj_backend.load_model(
-            utils.koboldai_vars.model,
+            self.model,
             self.model_type,
-            hf_checkpoint=utils.koboldai_vars.model
+            hf_checkpoint=self.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
             socketio_queue=koboldai_settings.queue,

From 6a627265754ef9b2cb2cfb20a476a8af1d383398 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:30:23 -0400
Subject: [PATCH 49/68] TPU Fix?

---
 aiserver.py                               | 2 +-
 modeling/inference_models/hf_mtj/class.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b06aaa83..998441c8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1548,7 +1548,7 @@ def general_startup(override_args=None):
             koboldai_vars.custmodpth = modpath
     elif args.model:
         logger.message(f"Welcome to KoboldAI!")
-        logger.message(f"You have selected the following Model: {koboldai_vars.model}")
+        logger.message(f"You have selected the following Model: {args.model}")
         if args.path:
             logger.message(f"You have selected the following path for your Model: {args.path}")
             koboldai_vars.custmodpth = args.path;
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 00fbfec3..bc31b3fa 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -187,7 +187,6 @@ class model_backend(HFInferenceModel):
         logger.info(self.model_type)
         tpu_mtj_backend.load_model(
             self.model,
-            self.model_type,
             hf_checkpoint=self.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,

From 54221942ef74c20ac209c1ce52576cc65bf961ae Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:43:32 -0400
Subject: [PATCH 50/68] TPU Fix

---
 aiserver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 998441c8..ae8fecb3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1561,6 +1561,7 @@ def general_startup(override_args=None):
     
     if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
          args.model_backend = "Huggingface MTJ"
+         
     
     if args.model:
         # At this point we have to try to load the model through the selected backend
@@ -1589,6 +1590,7 @@ def general_startup(override_args=None):
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
         arg_parameters['id'] = args.model
+        arg_parameters['model'] = args.model
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)

From ea4e3c477c82cc2239ec1da8bac5e4de4410e91f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:45:21 -0400
Subject: [PATCH 51/68] More debuging

---
 modeling/inference_models/hf_mtj/class.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index bc31b3fa..13591425 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -184,7 +184,8 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
-        logger.info(self.model_type)
+        logger.info(self.model)
+        logger.info(self.id)
         tpu_mtj_backend.load_model(
             self.model,
             hf_checkpoint=self.model

From b0ed7da9dde714943632de5fd917de557fdf30b6 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:47:45 -0400
Subject: [PATCH 52/68] more tpu debugging

---
 aiserver.py                               | 1 +
 modeling/inference_models/hf_mtj/class.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index ae8fecb3..ec8d05a7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1594,6 +1594,7 @@ def general_startup(override_args=None):
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
+        logger.info(vars(model_backends[args.model_backend]))
         koboldai_vars.model = args.model
         return args.model_backend
     else:
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 13591425..5f19897f 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -185,7 +185,6 @@ class model_backend(HFInferenceModel):
         utils.koboldai_vars.allowsp = True
 
         logger.info(self.model)
-        logger.info(self.id)
         tpu_mtj_backend.load_model(
             self.model,
             hf_checkpoint=self.model

From c9523a340e526c76f669bb269f7ff53116bf25c7 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:50:08 -0400
Subject: [PATCH 53/68] TPU Fix

---
 aiserver.py                               |  1 -
 modeling/inference_models/hf_mtj/class.py | 12 ++++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ec8d05a7..ae8fecb3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1594,7 +1594,6 @@ def general_startup(override_args=None):
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
-        logger.info(vars(model_backends[args.model_backend]))
         koboldai_vars.model = args.model
         return args.model_backend
     else:
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 5f19897f..e029db9d 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -150,7 +150,7 @@ class model_backend(HFInferenceModel):
 
         tpu_mtj_backend.socketio = utils.socketio
 
-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
             utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox
 
         print(
@@ -158,7 +158,7 @@ class model_backend(HFInferenceModel):
                 Colors.PURPLE, Colors.END
             )
         )
-        if utils.koboldai_vars.model in (
+        if self.model_name in (
             "TPUMeshTransformerGPTJ",
             "TPUMeshTransformerGPTNeoX",
         ) and (
@@ -168,7 +168,7 @@ class model_backend(HFInferenceModel):
             raise FileNotFoundError(
                 f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder"
             )
-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
             tpu_mtj_backend.pad_token_id = 2
 
         tpu_mtj_backend.koboldai_vars = utils.koboldai_vars
@@ -184,10 +184,10 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
-        logger.info(self.model)
+        logger.info(self.model_name)
         tpu_mtj_backend.load_model(
-            self.model,
-            hf_checkpoint=self.model
+            self.model_name,
+            hf_checkpoint=self.model_name
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
             socketio_queue=koboldai_settings.queue,

From 1a7c2ddab0b582758456af292c439f177460df53 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 20:14:22 -0400
Subject: [PATCH 54/68] TPU Fix?

---
 tpu_mtj_backend.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index d5a4d1db..bf08f745 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
     return carry
 
 class PenalizingCausalTransformer(CausalTransformer):
-    def __init__(self, config, **kwargs):
+    def __init__(self, badwordsids, config, **kwargs):
         # Initialize
         super().__init__(config, **kwargs)
         def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None):
             compiling_callback()
             numseqs = numseqs_aux.shape[0]
             # These are the tokens that we don't want the AI to ever write
-            badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+            badwords = jnp.array(badwordsids).squeeze()
             @hk.transform
             def generate_sample(context, ctx_length):
                 # Give the initial context to the transformer
@@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
     koboldai_vars.status_message = ""
 
-def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+import koboldai_settings
+
+def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):
@@ -1119,12 +1121,12 @@ def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109",
 
     global badwords
     # These are the tokens that we don't want the AI to ever write
-    badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+    badwords = jnp.array(badwordsids).squeeze()
 
     if not path.endswith("/"):
         path += "/"
 
-    network = PenalizingCausalTransformer(params, dematerialized=True)
+    network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True)
 
     if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX":
         network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])

From b5272ea607ad38e162b4625893ee491900305342 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 20:15:11 -0400
Subject: [PATCH 55/68] Whoops

---
 tpu_mtj_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index bf08f745..df37e0be 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -943,7 +943,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
 import koboldai_settings
 
-def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):

From adb77b86513f0037c2197c185af7a91553d36e04 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 25 May 2023 18:43:56 -0400
Subject: [PATCH 56/68] Fix for horde and multi-selected models

---
 aiserver.py                              |  1 +
 modeling/inference_models/horde/class.py |  2 +-
 static/application.js                    | 21 +++++++++++++++------
 static/koboldai.js                       | 21 +++++++++++++++------
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ae8fecb3..cfae94cd 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6199,6 +6199,7 @@ def UI_2_resubmit_model_info(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
+    logger.debug("Loading model with user input of: {}".format(data))
     model_backends[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 2c4c4bf5..38b1c5c6 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import time
+import time, json
 import torch
 import requests
 import numpy as np
diff --git a/static/application.js b/static/application.js
index ca81f729..8bc6c830 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4378,12 +4378,21 @@ function load_model() {
 	if (settings_area) {
 		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 			var element_data = element.value;
-			if (element.getAttribute("data_type") == "int") {
-				element_data = parseInt(element_data);
-			} else if (element.getAttribute("data_type") == "float") {
-				element_data = parseFloat(element_data);
-			} else if (element.getAttribute("data_type") == "bool") {
-				element_data = (element_data == 'on');
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
 			}
 			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
diff --git a/static/koboldai.js b/static/koboldai.js
index f0a1f6f8..fc33a020 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1970,12 +1970,21 @@ function load_model() {
 	if (settings_area) {
 		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 			var element_data = element.value;
-			if (element.getAttribute("data_type") == "int") {
-				element_data = parseInt(element_data);
-			} else if (element.getAttribute("data_type") == "float") {
-				element_data = parseFloat(element_data);
-			} else if (element.getAttribute("data_type") == "bool") {
-				element_data = (element_data == 'on');
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
 			}
 			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}

From 0659b5062ba1d73d90455212e3042dccc3645dfc Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 25 May 2023 18:46:35 -0400
Subject: [PATCH 57/68] Added proper model name for horde

---
 modeling/inference_models/horde/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 38b1c5c6..3b102b46 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -29,6 +29,7 @@ class model_backend(InferenceModel):
         self.url = "https://horde.koboldai.net"
         self.key = "0000000000"
         self.models = self.get_cluster_models()
+        self.model_name = "Horde"
         
 
         # Do not allow API to be served over the API

From d2c95bc60f6f9926699493b6a3144f427b62e5e9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 10:33:59 -0400
Subject: [PATCH 58/68] Fix for non-jailed menu path navigation

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index cfae94cd..e492cfcf 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6147,7 +6147,7 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        if data['ismenu'] == 'false':
+        if 'ismenu' in data and data['ismenu'] == 'false':
             valid_loaders = {}
             if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
                 #Here if we have a model id that's in our menu, we explicitly use that backend

From 2c82e9c5e0fe0903f16291bcdb3816427a5af7f2 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:08:30 -0400
Subject: [PATCH 59/68] GooseAI Fixes

---
 modeling/inference_models/api/class.py       |  2 +-
 modeling/inference_models/basic_api/class.py |  2 +-
 modeling/inference_models/horde/class.py     | 10 +++++++---
 modeling/inference_models/openai_gooseai.py  | 18 ++++++++++++------
 modeling/inference_models/readonly/class.py  |  2 +-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index 3d54edd9..b3129d5a 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -32,7 +32,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
             with open("settings/api.model_backend.settings", "r") as f:
                 self.base_url = json.load(f)['base_url']
diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py
index 2094d34e..b492c039 100644
--- a/modeling/inference_models/basic_api/class.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -33,7 +33,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "Colab"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
             with open("settings/api.model_backend.settings", "r") as f:
                 self.colaburl = json.load(f)['base_url']
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 3b102b46..2cc01708 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -39,19 +39,23 @@ class model_backend(InferenceModel):
         logger.debug("Horde Models: {}".format(self.models))
         return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
             with open("settings/horde.model_backend.settings", "r") as f:
                 temp = json.load(f)
                 self.base_url = temp['url']
                 self.key = temp['key']
+        if 'key' in parameters:
+            self.key = parameters['key']
+        if 'url' in parameters:
+            self.url = parameters['url']
         requested_parameters = []
         requested_parameters.extend([{
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "url",
-                                        "default": self.url,
+                                        "default": self.url if 'url' not in parameters else parameters['url'],
                                         "tooltip": "URL to the horde.",
                                         "menu_path": "",
                                         "check": {"value": "", 'check': "!="},
@@ -63,7 +67,7 @@ class model_backend(InferenceModel):
                                         "unit": "text",
                                         "label": "Key",
                                         "id": "key",
-                                        "default": self.key,
+                                        "default": self.key if 'key' not in parameters else parameters['key'],
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
                                         "menu_path": "",
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index e4b9dfb8..0195f650 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -1,5 +1,5 @@
 import torch
-import requests
+import requests,json
 import numpy as np
 from typing import List, Optional, Union
 import os
@@ -30,10 +30,15 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI" or model_name == "GooseAI"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
-                self.key = json.load(f)['key']
+                try:
+                    self.key = json.load(f)['key']
+                except:
+                    pass
+        if 'key' in parameters:
+            self.key = parameters['key']
         self.source = model_name
         requested_parameters = []
         requested_parameters.extend([{
@@ -66,7 +71,7 @@ class model_backend(InferenceModel):
         
     def set_input_parameters(self, parameters):
         self.key = parameters['key'].strip()
-        self.model = parameters['model']
+        self.model_name = parameters['model']
 
     def get_oai_models(self):
         if self.key == "":
@@ -94,6 +99,7 @@ class model_backend(InferenceModel):
 
                 
             logger.init_ok("OAI Engines", status="OK")
+            logger.debug("OAI Engines: {}".format(engines))
             return engines
         else:
             # Something went wrong, print the message and quit since we can't initialize an engine
@@ -134,7 +140,7 @@ class model_backend(InferenceModel):
         # Build request JSON data
         # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
         # as the koboldai_vars.model will always be OAI
-        if "GooseAI" in utils.koboldai_vars.configname:
+        if self.source == "GooseAI":
             reqdata = {
                 "prompt": decoded_prompt,
                 "max_tokens": max_new,
@@ -163,7 +169,7 @@ class model_backend(InferenceModel):
             }
 
         req = requests.post(
-            self.url,
+            "{}/{}/completions".format(self.url, self.model_name),
             json=reqdata,
             headers={
                 "Authorization": "Bearer " + self.key,
diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py
index 92531af4..98573990 100644
--- a/modeling/inference_models/readonly/class.py
+++ b/modeling/inference_models/readonly/class.py
@@ -33,7 +33,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "ReadOnly"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         requested_parameters = []
         return requested_parameters
         

From 52f5d879061c7ce593fe05a417466d83425f0ad6 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:25:28 -0400
Subject: [PATCH 60/68] Fix horde tokenizer

---
 modeling/inference_models/horde/class.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 2cc01708..f7da6604 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -30,6 +30,7 @@ class model_backend(InferenceModel):
         self.key = "0000000000"
         self.models = self.get_cluster_models()
         self.model_name = "Horde"
+        self.model = []
         
 
         # Do not allow API to be served over the API
@@ -114,7 +115,7 @@ class model_backend(InferenceModel):
 
         engines = req.json()
         try:
-            engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
+            engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
         except:
             logger.error(engines)
             raise
@@ -127,10 +128,14 @@ class model_backend(InferenceModel):
         return engines
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
+        tokenizer_name = "gpt2"
+        if len(self.model) > 0:
+            if self.model[0] == "all" and len(self.model) > 1:
+                tokenizer_name = self.model[1]
+            else:
+                tokenizer_name = self.model[0]
         self.tokenizer = self._get_tokenizer(
-            self.model
-            #if len(self.model) > 0
-            #else "gpt2",
+            tokenizer_name
         )
 
     def _save_settings(self):

From 0376ab5715a8283f05db91b4eede862bf84f216a Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:42:29 -0400
Subject: [PATCH 61/68] KoboldAI API model name fix

---
 modeling/inference_models/api/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index b3129d5a..64cfd2ab 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -28,6 +28,7 @@ class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.base_url = ""
+        self.model_name = "KoboldAI API"
 
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"

From 51cea7eb9dfafaf45a3b58b56dd5df45d21dca99 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 12:43:17 -0400
Subject: [PATCH 62/68] Added ability to add labels that are based on
 validation data in model loading settings

---
 modeling/inference_models/hf.py | 12 ++++++++++++
 static/application.js           | 31 +++++++++++++++++++++++++++++--
 static/custom.css               |  1 +
 static/koboldai.css             |  1 +
 static/koboldai.js              | 29 ++++++++++++++++++++++++++++-
 5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index ee585321..7a21bca6 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -77,6 +77,18 @@ class HFInferenceModel(InferenceModel):
                 break_values += [0] * (gpu_count - len(break_values))
                 if disk_blocks is not None:
                     break_values += [int(disk_blocks)]
+                requested_parameters.append({
+                                                "uitype": "Valid Display",
+                                                "unit": "text",
+                                                "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value
+                                                "id": "valid_layers",
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
                 for i in range(gpu_count):
                     requested_parameters.append({
                                                     "uitype": "slider",
diff --git a/static/application.js b/static/application.js
index 8bc6c830..11fba578 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4080,6 +4080,25 @@ function model_settings_checker() {
 		accept.classList.add("disabled");
 		accept.disabled = true;
 	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
 }
 
 function selected_model_info(sent_data) {
@@ -4250,18 +4269,26 @@ function selected_model_info(sent_data) {
 				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
 			model_area.append(new_setting);
 			loadmodelsettings.append(model_area);
 		}
 	}
 	
 	//unhide the first plugin settings
-	console.log(document.getElementById("modelplugin").value + "_settings_area");
 	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
-	model_settings_checker();
+	model_settings_checker()
 	
 }
 
diff --git a/static/custom.css b/static/custom.css
index 968d73e4..b8e3f455 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2395,6 +2395,7 @@ body.connected .popupfooter, .popupfooter.always-available {
 	padding-top: 0px;
 	text-align: right;
 	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
 }
 
 .setting_label {
diff --git a/static/koboldai.css b/static/koboldai.css
index 85aea08a..3252c21a 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -386,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding);
 	padding-top: 0px;
 	text-align: right;
 	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
 }
 
 .setting_label {
diff --git a/static/koboldai.js b/static/koboldai.js
index fc33a020..99383728 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1754,6 +1754,25 @@ function model_settings_checker() {
 		accept.classList.add("disabled");
 		accept.disabled = true;
 	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
 }
 
 function selected_model_info(sent_data) {
@@ -1924,13 +1943,21 @@ function selected_model_info(sent_data) {
 				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
 			model_area.append(new_setting);
 			loadmodelsettings.append(model_area);
 		}
 	}
 	
 	//unhide the first plugin settings
-	console.log(document.getElementById("modelplugin").value + "_settings_area");
 	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}

From 64ef8ca7c29a7eedc19f194a2c3a3e6506c80a8c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 14:04:26 -0400
Subject: [PATCH 63/68] Fix for UI1 not highlighting selected model

---
 static/custom.css | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/static/custom.css b/static/custom.css
index b8e3f455..25aa7818 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2410,4 +2410,9 @@ body.connected .popupfooter, .popupfooter.always-available {
 .input_error {
 	border: 5px solid red !important;
 	box-sizing: border-box !important;
+}
+
+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
 }
\ No newline at end of file

From acf5b40cd8907996a5365e2353dd3ca8c09ff134 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 19:38:37 -0400
Subject: [PATCH 64/68] Bug fix

---
 aiserver.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index e492cfcf..9653fb25 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1579,11 +1579,12 @@ def general_startup(override_args=None):
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:
-            if parameter['default'] == "" and parameter['id'] not in arg_parameters:
-                mising_parameters.append(parameter['id'])
-                ok_to_load = False
-            elif parameter['id'] not in arg_parameters:
-                arg_parameters[parameter['id']] = parameter['default']
+            if parameter['uitype'] != "Valid Display":
+                if parameter['default'] == "" and parameter['id'] not in arg_parameters:
+                    mising_parameters.append(parameter['id'])
+                    ok_to_load = False
+                elif parameter['id'] not in arg_parameters:
+                    arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))

From 9723154bed0c442a7d0140c077f5c5edc7e2f73e Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 20:10:11 -0400
Subject: [PATCH 65/68] Fix for --path

---
 aiserver.py                     | 7 +++----
 modeling/inference_models/hf.py | 4 +++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 9653fb25..4e02ef96 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1545,14 +1545,13 @@ def general_startup(override_args=None):
         if(modpath):
             # Save directory to koboldai_vars
             koboldai_vars.model = "NeoCustom"
-            koboldai_vars.custmodpth = modpath
+            args.path = modpath
     elif args.model:
         logger.message(f"Welcome to KoboldAI!")
         logger.message(f"You have selected the following Model: {args.model}")
         if args.path:
             logger.message(f"You have selected the following path for your Model: {args.path}")
-            koboldai_vars.custmodpth = args.path;
-            koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
+            model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
             
     #setup socketio relay queue
     koboldai_settings.queue = multiprocessing.Queue()
@@ -1592,7 +1591,7 @@ def general_startup(override_args=None):
             exit()
         arg_parameters['id'] = args.model
         arg_parameters['model'] = args.model
-        arg_parameters['model_path'] = args.path
+        arg_parameters['path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
         koboldai_vars.model = args.model
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 7a21bca6..c7bfdee4 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -338,9 +338,11 @@ class HFInferenceModel(InferenceModel):
         Returns a string of the model's path locally, or None if it is not downloaded.
         If ignore_existance is true, it will always return a path.
         """
+        if os.path.exists(self.path):
+                return self.path
 
         if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
-            model_path = utils.koboldai_vars.custmodpth
+            model_path = self.path
             assert model_path
 
             # Path can be absolute or relative to models directory

From 9bc9021843adf78f5b670a6974a4643f18efa099 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 21:16:54 -0400
Subject: [PATCH 66/68] Added better help message for model_parameters in
 command line arguments

---
 aiserver.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 4e02ef96..406eb01d 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1366,7 +1366,7 @@ def general_startup(override_args=None):
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
     parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
-    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
+    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1571,12 +1571,13 @@ def general_startup(override_args=None):
         parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
         ok_to_load = True
         mising_parameters = []
-        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {}
         
         #If we're on colab we'll set everything to GPU0
         if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
             arg_parameters['use_gpu'] = True
         
+        
         for parameter in parameters:
             if parameter['uitype'] != "Valid Display":
                 if parameter['default'] == "" and parameter['id'] not in arg_parameters:
@@ -1586,9 +1587,13 @@ def general_startup(override_args=None):
                     arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
-            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
+        if args.model_parameters.lower() == "help":
+            logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
+            exit()
         arg_parameters['id'] = args.model
         arg_parameters['model'] = args.model
         arg_parameters['path'] = args.path

From cce5c1932cd94d3c710db62d1ced8feac2b5d774 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 21:40:39 -0400
Subject: [PATCH 67/68] Fix for custom model names

---
 aiserver.py                                         | 5 ++++-
 modeling/inference_models/generic_hf_torch/class.py | 2 +-
 modeling/inference_models/hf.py                     | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 406eb01d..d9ed0088 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1034,7 +1034,7 @@ def getmodelname():
     if(koboldai_vars.online_model != ''):
         return(f"{koboldai_vars.model}/{koboldai_vars.online_model}")
     if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
-        modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
+        modelname = os.path.basename(os.path.normpath(model.path))
         return modelname
     else:
         modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only"
@@ -1687,6 +1687,9 @@ def load_model(model_backend, initial_load=False):
     model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
+    if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
+        koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
+        logger.info(koboldai_vars.model)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index bbd42096..fd4c2a1a 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -41,7 +41,7 @@ class model_backend(HFTorchInferenceModel):
 
         if self.model_name == "NeoCustom":
             self.model_name = os.path.basename(
-                os.path.normpath(utils.koboldai_vars.custmodpth)
+                os.path.normpath(self.path)
             )
         utils.koboldai_vars.model = self.model_name
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index c7bfdee4..5987a1ce 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -188,6 +188,7 @@ class HFInferenceModel(InferenceModel):
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.breakmodel = False
             self.lazy_load = False
+        logger.info(parameters)
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 

From 47276c3424df73bd13fe7bcbe1c686b94319507c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 27 May 2023 08:49:21 -0400
Subject: [PATCH 68/68] Bug Fix

---
 modeling/inference_models/hf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 5987a1ce..4226d1b1 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -339,7 +339,8 @@ class HFInferenceModel(InferenceModel):
         Returns a string of the model's path locally, or None if it is not downloaded.
         If ignore_existance is true, it will always return a path.
         """
-        if os.path.exists(self.path):
+        if self.path is not None:
+            if os.path.exists(self.path):
                 return self.path
 
         if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: