Merge pull request #362 from ebolam/Model_Plugins

Implement modular model backends Phase 1
2025-06-05 21:59:24 +02:00 · 2023-05-27 15:33:20 +02:00
parent 9e7b813c8a 47276c3424
commit 97d2a78899
24 changed files with 2991 additions and 1242 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -56,6 +56,7 @@ import html
 import argparse
 import sys
 import gc
+import traceback

 import lupa

@@ -167,6 +168,7 @@ class MenuFolder(MenuItem):
            "size": "",
            "isMenu": True,
            "isDownloaded": False,
+            "isDirectory":  False
        }

 class MenuModel(MenuItem):
@@ -177,11 +179,13 @@ class MenuModel(MenuItem):
        vram_requirements: str = "",
        model_type: MenuModelType = MenuModelType.HUGGINGFACE,
        experimental: bool = False,
+        model_backend: str = "Huggingface",
    ) -> None:
        super().__init__(label, name, experimental)
        self.model_type = model_type
        self.vram_requirements = vram_requirements
        self.is_downloaded = is_model_downloaded(self.name)
+        self.model_backend = model_backend
    
    def to_ui1(self) -> list:
        return [
@@ -199,8 +203,28 @@ class MenuModel(MenuItem):
            "size": self.vram_requirements,
            "isMenu": False,
            "isDownloaded": self.is_downloaded,
+            "isDirectory": False,
        }

+class MenuPath(MenuItem):
+    def to_ui1(self) -> list:
+        return [
+            self.label,
+            self.name,
+            "",
+            True,
+        ]
+    
+    def to_json(self) -> dict:
+        return {
+            "label": self.label,
+            "name": self.name,
+            "size": "",
+            "isMenu": True,
+            "isDownloaded": False,
+            "isDirectory": True,
+            "path": "./models"
+        }

 # AI models Menu
 # This is a dict of lists where they key is the menu name, and the list is the menu items.
@@ -208,9 +232,9 @@ class MenuModel(MenuItem):
 # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
 model_menu = {
    "mainmenu": [
-        MenuModel("Load a model from its directory", "NeoCustom"),
-        MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
-        MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
+        MenuPath("Load a model from its directory", "NeoCustom"),
+        MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
+        MenuModel("Load custom model from Hugging Face", "customhuggingface", ""),
        MenuFolder("Adventure Models", "adventurelist"),
        MenuFolder("Novel Models", "novellist"),
        MenuFolder("Chat Models", "chatlist"),
@@ -224,7 +248,7 @@ model_menu = {
        MenuFolder("Official RWKV-4", "rwkvlist"),
        MenuFolder("Untuned GPT2", "gpt2list"),
        MenuFolder("Online Services", "apilist"),
-        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
+        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
    ],
    'adventurelist': [
        MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"),
@@ -361,12 +385,11 @@ model_menu = {
        MenuFolder("Return to Main Menu", "mainmenu"),
        ],
    'apilist': [
-        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API),
-        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API),
+        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"),
+        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"),
+        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"),
+        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"),
+        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"),
        MenuFolder("Return to Main Menu", "mainmenu"),
    ]
 }
@@ -599,6 +622,24 @@ utils.socketio = socketio
 # Weird import position to steal koboldai_vars from utils
 from modeling.patches import patch_transformers

+#Load all of the model importers
+import importlib
+model_backend_code = {}
+model_backends = {}
+for module in os.listdir("./modeling/inference_models"):
+    if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
+        try:
+            model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+            model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+            if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]):
+                if model_backends[model_backend_code[module].model_backend_name].disable:
+                    del model_backends[model_backend_code[module].model_backend_name]
+        except Exception:
+            logger.error("Model Backend {} failed to load".format(module))
+            logger.error(traceback.format_exc())
+
+logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends])))
+        

 old_socketio_on = socketio.on
 def new_socketio_on(*a, **k):
@@ -614,10 +655,14 @@ def new_socketio_on(*a, **k):
 socketio.on = new_socketio_on

 def emit(*args, **kwargs):
-    try:
-        return _emit(*args, **kwargs)
-    except AttributeError:
-        return socketio.emit(*args, **kwargs)
+    if has_request_context():
+        try:
+            return _emit(*args, **kwargs)
+        except AttributeError:
+            return socketio.emit(*args, **kwargs)
+    else: #We're trying to send data outside of the http context. This won't work. Try the relay
+        if koboldai_settings.queue is not None:
+            koboldai_settings.queue.put([args[0], args[1], kwargs])
 utils.emit = emit

 #replacement for tpool.execute to maintain request contexts
@@ -905,6 +950,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
        )

 def get_folder_path_info(base):
+    if base is None:
+        return [], []
    if base == 'This PC':
        breadcrumbs = [['This PC', 'This PC']]
        paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
@@ -987,7 +1034,7 @@ def getmodelname():
    if(koboldai_vars.online_model != ''):
        return(f"{koboldai_vars.model}/{koboldai_vars.online_model}")
    if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
-        modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
+        modelname = os.path.basename(os.path.normpath(model.path))
        return modelname
    else:
        modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only"
@@ -1318,16 +1365,14 @@ def general_startup(override_args=None):
    parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
    parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
    parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
+    parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
+    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)")
    parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
    parser.add_argument("--apikey", help="Specify the API key to use for online services")
    parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
    parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
    parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
    parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
-    parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11")
-    parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.")
    parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
    parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
    parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
@@ -1360,6 +1405,7 @@ def general_startup(override_args=None):
        args = parser.parse_args(shlex.split(override_args))
    elif(os.environ.get("KOBOLDAI_ARGS") is not None):
        import shlex
+        logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"]))
        args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
    else:
        args = parser.parse_args()
@@ -1382,9 +1428,11 @@ def general_startup(override_args=None):
    for arg in temp:
        if arg == "path":
            if "model_path" in os.environ:
+                logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"]))
                setattr(args, arg, os.environ["model_path"])
        else:
            if arg in os.environ:
+                logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg]))
                if isinstance(getattr(args, arg), bool):
                    if os.environ[arg].lower() == "true":
                        setattr(args, arg, True)
@@ -1410,8 +1458,6 @@ def general_startup(override_args=None):

    args.max_summary_length = int(args.max_summary_length)

-    if args.model:
-        koboldai_vars.model = args.model;
    koboldai_vars.revision = args.revision
    koboldai_settings.multi_story = args.multi_story

@@ -1436,7 +1482,7 @@ def general_startup(override_args=None):
        koboldai_vars.quiet = True

    if args.nobreakmodel:
-        koboldai_vars.nobreakmodel = True
+        model_backends['Huggingface'].nobreakmodel = True

    if args.remote:
        koboldai_vars.host = True;
@@ -1447,6 +1493,9 @@ def general_startup(override_args=None):
    if args.localtunnel:
        koboldai_vars.host = True;

+    if args.lowmem:
+        model_backends['Huggingface'].low_mem = True
+
    if args.host != "Disabled":
            # This means --host option was submitted without an argument
            # Enable all LAN IPs (0.0.0.0/0)
@@ -1479,6 +1528,9 @@ def general_startup(override_args=None):
        koboldai_vars.trust_remote_code = True
    if args.cpu:
        koboldai_vars.use_colab_tpu = False
+        koboldai_vars.hascuda = False
+        koboldai_vars.usegpu = False
+        model_backends['Huggingface'].nobreakmodel = True

    koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
    koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1493,262 +1545,67 @@ def general_startup(override_args=None):
        if(modpath):
            # Save directory to koboldai_vars
            koboldai_vars.model = "NeoCustom"
-            koboldai_vars.custmodpth = modpath
+            args.path = modpath
    elif args.model:
        logger.message(f"Welcome to KoboldAI!")
-        logger.message(f"You have selected the following Model: {koboldai_vars.model}")
+        logger.message(f"You have selected the following Model: {args.model}")
        if args.path:
            logger.message(f"You have selected the following path for your Model: {args.path}")
-            koboldai_vars.custmodpth = args.path;
-            koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
+            model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
            
    #setup socketio relay queue
    koboldai_settings.queue = multiprocessing.Queue()
    
    socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
    
-        
-#==================================================================#
-# Load Model
-#==================================================================# 
-
-@socketio.on("get_model_info")
-def get_model_info(model, directory=""):
-    logger.info("Selected: {}, {}".format(model, directory))
-    # if the model is in the api list
-    disk_blocks = 0
-    key = False
-    breakmodel = False
-    gpu = False
-    layer_count = None
-    key_value = ""
-    break_values = []
-    url = False
-    default_url = None
-    models_on_url = False
-    multi_online_models = False
-    show_online_model_select=False
-    gpu_count = torch.cuda.device_count()
-    gpu_names = []
-    send_horde_models = False
-    show_custom_model_box = False
-    for i in range(gpu_count):
-        gpu_names.append(torch.cuda.get_device_name(i))
-    if model in ['Colab', 'API']:
-        url = True
-    elif model == 'CLUSTER':
-        models_on_url = True
-        show_online_model_select=True
-        url = True
-        key = True
-        default_url = koboldai_vars.horde_url
-        multi_online_models = True
-        key_value = koboldai_vars.horde_api_key
-        url = koboldai_vars.horde_url
-        if key_value:
-            send_horde_models = True
-    elif model in [x.name for x in model_menu['apilist']]:
-        show_online_model_select=True
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                # Check if API key exists
-                try:
-                    js = json.load(file)
-
-                    if("apikey" in js and js["apikey"] != ""):
-                        # API key exists, grab it and close the file
-                        key_value = js["apikey"]
-                    elif 'oaiapikey' in js and js['oaiapikey'] != "":
-                        key_value = js["oaiapikey"]
-                    if model in ('GooseAI', 'OAI'): 
-                        get_oai_models({'model': model, 'key': key_value})
-                except json.decoder.JSONDecodeError:
-                    print(":(")
-                    pass
-        key = True
-    elif model == 'ReadOnly':
-        pass
-    #elif model == 'customhuggingface':
-    #    show_custom_model_box = True
-    elif args.cpu:
-        pass
-    else:
-        layer_count = get_layer_count(model, directory=directory)
-        if layer_count is None:
-            breakmodel = False
-            gpu = True
-        else:
-            breakmodel = True
-            if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]:
-                filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory)))
-            else:
-                filename = "settings/{}.breakmodel".format(model.replace("/", "_"))
-            if path.exists(filename):
-                with open(filename, "r") as file:
-                    data = [x for x in file.read().split("\n")[:2] if x != '']
-                    if len(data) < 2:
-                        data.append("0")
-                    break_values, disk_blocks = data
-                    break_values = break_values.split(",")
-            else:
-                break_values = [layer_count]
-            break_values = [int(x) for x in break_values if x != '']
-            break_values += [0] * (gpu_count - len(break_values))
-    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
-                         'disk_break_value': disk_blocks, 'accelerate': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
-                         'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
-    emit('selected_model_info', {'key_value': key_value, 'key':key, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'disk_break_value': disk_blocks, 'disk_break': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
-                         'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
-                         'show_custom_model_box': show_custom_model_box})
-    if send_horde_models:
-        get_cluster_models({'key': key_value, 'url': default_url})
-    elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER':
-        get_oai_models(key_value)
+    if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
+         args.model_backend = "Huggingface MTJ"
+         
    
-    
-
-def get_layer_count(model, directory=""):
-    if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
-        if(model == "GPT2Custom"):
-            with open(os.path.join(directory, "config.json"), "r") as f:
-                model_config = json.load(f)
-        # Get the model_type from the config or assume a model type if it isn't present
-        else:
-            if(directory):
-                model = directory
-            from transformers import AutoConfig
-            if(os.path.isdir(model.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(is_model_downloaded(model)):
-                model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(directory)):
-                model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            else:
-                model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
-        try:
-            if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
-                return utils.num_layers(model_config)
-            else:
-                return None
-        except:
-            return None
-    else:
-        return None
-
-@socketio.on('OAI_Key_Update')
-def get_oai_models(data):
-    key = data['key']
-    model = data['model']
-    koboldai_vars.oaiapikey = key
-    if model == 'OAI':
-        url = "https://api.openai.com/v1/engines"
-    elif model == 'GooseAI':
-        url = "https://api.goose.ai/v1/engines"
-    else:
-        return
+    if args.model:
+        # At this point we have to try to load the model through the selected backend
+        if args.model_backend not in model_backends:
+            logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
+            exit()
+        #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
+        parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
+        ok_to_load = True
+        mising_parameters = []
+        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {}
        
-    # Get list of models from OAI
-    logger.init("OAI Engines", status="Retrieving")
-    req = requests.get(
-        url, 
-        headers = {
-            'Authorization': 'Bearer '+key
-            }
-        )
-    if(req.status_code == 200):
-        r = req.json()
-        engines = r["data"]
-        try:
-            engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
-        except:
-            logger.error(engines)
-            raise
+        #If we're on colab we'll set everything to GPU0
+        if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
+            arg_parameters['use_gpu'] = True
        
-        online_model = ""
-        changed=False
        
-        #Save the key
-        if not path.exists("settings"):
-            # If the client settings file doesn't exist, create it
-            # Write API key to file
-            os.makedirs('settings', exist_ok=True)
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                js = json.load(file)
-                if 'online_model' in js:
-                    online_model = js['online_model']
-                if "apikey" in js:
-                    if js['apikey'] != key:
-                        changed=True
-        else:
-            js = {}
-            changed=True
-
-        if changed:
-            with open("settings/{}.v2_settings".format(model), "w") as file:
-                js["apikey"] = key
-                file.write(json.dumps(js, indent=3))
-            
-        logger.init_ok("OAI Engines", status="OK")
-        emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-        emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
+        for parameter in parameters:
+            if parameter['uitype'] != "Valid Display":
+                if parameter['default'] == "" and parameter['id'] not in arg_parameters:
+                    mising_parameters.append(parameter['id'])
+                    ok_to_load = False
+                elif parameter['id'] not in arg_parameters:
+                    arg_parameters[parameter['id']] = parameter['default']
+        if not ok_to_load:
+            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
+            logger.error("Missing: {}".format(", ".join(mising_parameters)))
+            exit()
+        if args.model_parameters.lower() == "help":
+            logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
+            exit()
+        arg_parameters['id'] = args.model
+        arg_parameters['model'] = args.model
+        arg_parameters['path'] = args.path
+        arg_parameters['menu_path'] = ""
+        model_backends[args.model_backend].set_input_parameters(arg_parameters)
+        koboldai_vars.model = args.model
+        return args.model_backend
    else:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("OAI Engines", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
-
-@socketio.on("get_cluster_models")
-def get_cluster_models(msg):
-    koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key
-    url = msg['url'] or koboldai_vars.horde_url
-    koboldai_vars.horde_url = url
-    # Get list of models from public cluster
-    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
-    try:
-        req = requests.get(f"{url}/api/v2/status/models?type=text")
-    except:
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error("Provided KoboldAI Horde URL unreachable")
-        emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
-        return
-    if not req.ok:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
-        return
-
-    engines = req.json()
-    logger.debug(engines)
-    try:
-        engines = [[en["name"], en["name"]] for en in engines]
-    except:
-        logger.error(engines)
-        raise
-    logger.debug(engines)
-    
-    online_model = ""
-    savesettings()
-
-    logger.init_ok("KAI Horde Models", status="OK")
-
-    emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-    emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
-
-
-def reset_model_settings():
-    koboldai_vars.reset_for_model_load()
+        return "Read Only"
+        
    
+        
    
 def unload_model():
    global model
@@ -1781,7 +1638,7 @@ def unload_model():
    koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
    
    
-def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
+def load_model(model_backend, initial_load=False):
    global model
    global tokenizer
    global model_config
@@ -1792,188 +1649,48 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
    if initial_load:
        use_breakmodel_args = True

-    reset_model_settings()
    koboldai_vars.reset_model()

-    koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model
-    if koboldai_vars.cluster_requested_models == [""]:
-        koboldai_vars.cluster_requested_models = []
-
    koboldai_vars.noai = False
-    if not use_breakmodel_args:
-        set_aibusy(True)
-        if koboldai_vars.model != 'ReadOnly':
-            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
-            #Have to add a sleep so the server will send the emit for some reason
-            time.sleep(0.1)
+    set_aibusy(True)
+    if koboldai_vars.model != 'ReadOnly':
+        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True)
+        #Have to add a sleep so the server will send the emit for some reason
+        time.sleep(0.1)

-    if gpu_layers is not None:
-        args.breakmodel_gpulayers = gpu_layers
-    elif use_breakmodel_args:
-        gpu_layers = args.breakmodel_gpulayers
-    if breakmodel_args_default_to_cpu and gpu_layers is None:
-        gpu_layers = args.breakmodel_gpulayers = []
-    if disk_layers is not None:
-        args.breakmodel_disklayers = int(disk_layers)
-    elif use_breakmodel_args:
-        disk_layers = args.breakmodel_disklayers
-    if breakmodel_args_default_to_cpu and disk_layers is None:
-        disk_layers = args.breakmodel_disklayers = 0
+    if 'model' in globals():
+        model.unload()
    
-    unload_model()
-    
-    if online_model == "":
-        koboldai_vars.configname = getmodelname()
-    #Let's set the GooseAI or OpenAI server URLs if that's applicable
-    else:
-        koboldai_vars.online_model = online_model
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
-        elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list):
-                if len(online_model) != 1:
-                    koboldai_vars.configname = koboldai_vars.model
-                else:
-                    koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
-        else:
-            koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
-
-        if path.exists(get_config_filename()):
-            changed=False
-            with open(get_config_filename(), "r") as file:
-                # Check if API key exists
-                js = json.load(file)
-                if 'online_model' in js:
-                    if js['online_model'] != online_model:
-                        changed=True
-                        js['online_model'] = online_model
-                else:
-                    changed=True
-                    js['online_model'] = online_model
-
-            if changed:
-                with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
-                    file.write(json.dumps(js, indent=3))
-
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            args.configname = "GooseAI" + "/" + online_model
-        elif koboldai_vars.model != "CLUSTER":
-            args.configname = koboldai_vars.model + "/" + online_model
-        koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
    
    # If transformers model was selected & GPU available, ask to use CPU or GPU
    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        # loadmodelsettings()
        # loadsettings()
        logger.init("GPU support", status="Searching")
-        koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
        koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
-        if(args.breakmodel is not None and args.breakmodel):
-            logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
-        if(args.breakmodel_layers is not None):
-            logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
-        if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)):
-            logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
-            koboldai_vars.bmsupported = False
-        if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
-            logger.warning("This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.")
        if(koboldai_vars.hascuda):
            logger.init_ok("GPU support", status="Found")
        else:
            logger.init_warn("GPU support", status="Not Found")
        
-        if args.cpu:
-            koboldai_vars.usegpu = False
-            gpu_layers = None
-            disk_layers = None
-            koboldai_vars.breakmodel = False
-        elif koboldai_vars.hascuda:
-            if(koboldai_vars.bmsupported):
-                koboldai_vars.usegpu = False
-                koboldai_vars.breakmodel = True
-            else:
-                koboldai_vars.breakmodel = False
-                koboldai_vars.usegpu = use_gpu
+        #if koboldai_vars.hascuda:
+        #    if(koboldai_vars.bmsupported):
+        #        koboldai_vars.usegpu = False
+        #        koboldai_vars.breakmodel = True
+        #    else:
+        #        koboldai_vars.breakmodel = False
+        #        koboldai_vars.usegpu = use_gpu
    else:
        koboldai_vars.default_preset = koboldai_settings.default_preset

-
-    # Ask for API key if InferKit was selected
-    if koboldai_vars.model == "InferKit":
-        koboldai_vars.apikey = koboldai_vars.oaiapikey
                    
-    # Swap OAI Server if GooseAI was selected
-    if koboldai_vars.model == "GooseAI":
-        koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-        koboldai_vars.model = "OAI"
-        koboldai_vars.configname = "GooseAI"
-
-    # Ask for API key if OpenAI was selected
-    if koboldai_vars.model == "OAI" and not koboldai_vars.configname:
-        koboldai_vars.configname = "OAI"
-        
-    if koboldai_vars.model == "ReadOnly":
-        koboldai_vars.noai = True
-
-    # TODO: InferKit
-    if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
-        pass
-    elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
-        koboldai_vars.colaburl = url or koboldai_vars.colaburl
-        koboldai_vars.usegpu = False
-        koboldai_vars.breakmodel = False
-
-        if koboldai_vars.model == "Colab":
-            from modeling.inference_models.basic_api import BasicAPIInferenceModel
-            model = BasicAPIInferenceModel()
-        elif koboldai_vars.model == "API":
-            from modeling.inference_models.api import APIInferenceModel
-            model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", ""))
-        elif koboldai_vars.model == "CLUSTER":
-            from modeling.inference_models.horde import HordeInferenceModel
-            model = HordeInferenceModel()
-        elif koboldai_vars.model == "OAI":
-            from modeling.inference_models.openai import OpenAIAPIInferenceModel
-            model = OpenAIAPIInferenceModel()
-
-        model.load(initial_load=initial_load)
-    # TODO: This check sucks, make a model object or somethign
-    elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
-        # HF Torch
-        logger.init("Transformers", status='Starting')
-        for m in ("GPTJModel", "XGLMModel"):
-            try:
-                globals()[m] = getattr(__import__("transformers"), m)
-            except:
-                pass
-
-        from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
-        model = GenericHFTorchInferenceModel(
-            koboldai_vars.model,
-            lazy_load=koboldai_vars.lazy_load,
-            low_mem=args.lowmem
-        )
-
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
-        logger.info(f"Pipeline created: {koboldai_vars.model}")
-    else:
-        # TPU
-        from modeling.inference_models.hf_mtj import HFMTJInferenceModel
-        model = HFMTJInferenceModel(
-            koboldai_vars.model
-        )
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
+    model = model_backends[model_backend]
+    model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+    koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
+    if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
+        koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
+        logger.info(koboldai_vars.model)
+    logger.debug("Model Type: {}".format(koboldai_vars.model_type))
    
    # TODO: Convert everywhere to use model.tokenizer
    if model:
@@ -3993,7 +3710,8 @@ def calcsubmit(txt):
                        bias += [1] * (i - top_index)
                    bias[i] = b["multiplier"]

-            device = utils.get_auxilary_device()
+            
+            device = model.get_auxilary_device()
            attention_bias.attention_bias = torch.Tensor(bias).to(device)
            logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}")
        logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))
@@ -6422,7 +6140,9 @@ def UI_2_retry(data):
@socketio.on('load_model_button')
@logger.catch
 def UI_2_load_model_button(data):
-    sendModelSelection()
+    emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]})
+    
+
    
 #==================================================================#
 # Event triggered when user clicks the a model
@@ -6430,23 +6150,56 @@ def UI_2_load_model_button(data):
@socketio.on('select_model')
@logger.catch
 def UI_2_select_model(data):
-    
-    #We've selected a menu
-    if data['model'] in model_menu:
-        sendModelSelection(menu=data['model'])
-    #We've selected a custom line
-    elif data['menu'] in ("NeoCustom", "GPT2Custom"):
-        get_model_info(data['menu'], directory=data['display_name'])
-    #We've selected a custom menu folder
-    elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data:
-        sendModelSelection(menu=data['model'], folder=data['path'])
-    #We've selected a custom menu
-    elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"):
-        sendModelSelection(menu=data['model'], folder="./models")
+    logger.debug("Clicked on model entry: {}".format(data))
+    if data["name"] in model_menu and data['ismenu'] == "true":
+        emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
    else:
-        #We now have some model we want to potentially load.
-        #First we need to send the client the model parameters (layers, etc)
-        get_model_info(data['model'])
+        #Get load methods
+        if 'ismenu' in data and data['ismenu'] == 'false':
+            valid_loaders = {}
+            if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
+                #Here if we have a model id that's in our menu, we explicitly use that backend
+                for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
+            else:
+                #Here we have a model that's not in our menu structure (either a custom model or a custom path
+                #so we'll just go through all the possible loaders
+                for model_backend in model_backends:
+                    if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+                        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
+        else:
+            #Get directories
+            paths, breadcrumbs = get_folder_path_info(data['path'])
+            output = []
+            for path in paths:
+                valid=False
+                for model_backend in model_backends:
+                    if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
+                        logger.debug("{} says valid".format(model_backend))
+                        valid=True
+                        break
+                    else:
+                        logger.debug("{} says invalid".format(model_backend))
+                    
+                output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
+    return
+
+
+
+
+#==================================================================#
+# Event triggered when user changes a model parameter and it's set to resubmit
+#==================================================================#
+@socketio.on('resubmit_model_info')
+@logger.catch
+def UI_2_resubmit_model_info(data):
+    valid_loaders = {}
+    for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data)
+    emit("selected_model_info", {"model_backends": valid_loaders})

 #==================================================================#
 # Event triggered when user loads a model
@@ -6454,26 +6207,10 @@ def UI_2_select_model(data):
@socketio.on('load_model')
@logger.catch
 def UI_2_load_model(data):
-    if not os.path.exists("settings/"):
-        os.mkdir("settings")
-    changed = True
-    if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
-        with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
-            file_data = file.read().split('\n')[:2]
-            if len(file_data) < 2:
-                file_data.append("0")
-            gpu_layers, disk_layers = file_data
-            if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']:
-                changed = False
-    if changed:
-        f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w")
-        f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers']))
-        f.close()
-    koboldai_vars.colaburl = data['url'] + "/request"
-    koboldai_vars.model = data['model']
-    koboldai_vars.custmodpth = data['path']
-    print("loading Model")
-    load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
+    logger.debug("Loading model with user input of: {}".format(data))
+    model_backends[data['plugin']].set_input_parameters(data)
+    load_model(data['plugin'])
+    #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])

 #==================================================================#
 # Event triggered when load story is clicked
@@ -8095,7 +7832,8 @@ def send_one_time_messages(data, wait_time=0):
 # Test
 #==================================================================#
 def model_info():
-    if model_config is not None:
+    global model_config
+    if 'model_config' in globals() and model_config is not None:
        if isinstance(model_config, dict):
            if 'model_type' in model_config:
                model_type = str(model_config['model_type'])
@@ -10982,10 +10720,8 @@ for schema in config_endpoint_schemas:
 #==================================================================#
 #  Final startup commands to launch Flask app
 #==================================================================#
-def startup():
-    if koboldai_vars.model == "" or koboldai_vars.model is None:
-        koboldai_vars.model = "ReadOnly"
-    socketio.start_background_task(load_model, **{'initial_load':True})
+def startup(command_line_backend):
+    socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True})
            
 print("", end="", flush=True)

@@ -10994,7 +10730,7 @@ def run():
    global app
    global tpu_mtj_backend

-    general_startup()
+    command_line_backend = general_startup()
    # Start flask & SocketIO
    logger.init("Flask", status="Starting")
    if koboldai_vars.host:
@@ -11044,7 +10780,7 @@ def run():
           cloudflare = _run_cloudflared(port)
           koboldai_vars.cloudflare_link = cloudflare
           
-        startup()
+        startup(command_line_backend)
       
        if(args.localtunnel or args.ngrok or args.remote):
            with open('cloudflare.log', 'w') as cloudflarelog:
@@ -11064,7 +10800,7 @@ def run():
        else:
            socketio.run(app, port=port)
    else:
-        startup()
+        startup(command_line_backend)
        if args.unblock:
            if not args.no_ui:
                try:
@@ -11092,13 +10828,13 @@ def run():
 if __name__ == "__main__":
    run()
 else:
-    general_startup()
+    command_line_backend = general_startup()
    # Start flask & SocketIO
    logger.init("Flask", status="Starting")
    Session(app)
    logger.init_ok("Flask", status="OK")
    patch_transformers()
-    startup()
+    startup(command_line_backend)
    koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000
    print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)
    
--- a/data/one_time_messages.json
+++ b/data/one_time_messages.json
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -647,7 +647,7 @@ class settings(object):
                    raise

 class model_settings(settings):
-    local_only_variables = ['badwordsids', 'apikey', 'default_preset']
+    local_only_variables = ['apikey', 'default_preset']
    no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns', 
                         'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', 
                         'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
@@ -710,7 +710,6 @@ class model_settings(settings):
        self.modeldim    = -1     # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
        self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
        self.newlinemode = "n"
-        self.lazy_load   = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
        self.presets     = []   # Holder for presets
        self.selected_preset = ""
        self.uid_presets = []
@@ -1203,7 +1202,6 @@ class undefined_settings(settings):
        super().__setattr__(name, value)
        logger.error("{} just set {} to {} in koboldai_vars. That variable isn't defined!".format(inspect.stack()[1].function, name, value))
        
-
 class system_settings(settings):
    local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                            'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
@@ -1211,7 +1209,7 @@ class system_settings(settings):
                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
    no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                         'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
-                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 
+                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
                         'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
    settings_name = "system"
@@ -1237,7 +1235,7 @@ class system_settings(settings):
        self.corescript  = "default.lua"  # Filename of corescript to load
        self.gpu_device  = 0      # Which PyTorch device to use when using pure GPU generation
        self.savedir     = os.getcwd()+"\\stories"
-        self.hascuda     = False  # Whether torch has detected CUDA on the system
+        self.hascuda     = torch.cuda.is_available()  # Whether torch has detected CUDA on the system
        self.usegpu      = False  # Whether to launch pipeline with GPU support
        self.splist      = []
        self.spselect    = ""     # Temporary storage for soft prompt filename to load
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,18 @@ class InferenceModel:
        ]
        self.tokenizer = None
        self.capabilties = ModelCapabilities()
+        self.model_name = "Not Defined"
+    
+    def is_valid(self, model_name, model_path, menu_path, vram):
+        return True
+        
+    def requested_parameters(self, model_name, model_path, menu_path, vram):
+        return {}
+        
+    def set_input_parameters(self, parameters):
+        for parameter in parameters:
+            setattr(self, parameter, parameters[parameter])
+        return

    def load(self, save_model: bool = False, initial_load: bool = False) -> None:
        """User-facing load function. Do not override this; try `_load()` instead."""
@@ -176,12 +188,19 @@ class InferenceModel:
        self._pre_load()
        self._load(save_model=save_model, initial_load=initial_load)
        self._post_load()
+        self._save_settings()
+
+    def unload(self):
+        return

    def _pre_load(self) -> None:
        """Pre load hook. Called before `_load()`."""

    def _post_load(self) -> None:
        """Post load hook. Called after `_load()`."""
+    
+    def _save_settings(self) -> None:
+        """Save settings hook. Called after `_post_load()`."""

    def _load(self, save_model: bool, initial_load: bool) -> None:
        """Main load method. All logic related to loading the model onto the
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -6,6 +6,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os

 import utils
 from logger import logger
@@ -17,15 +18,42 @@ from modeling.inference_model import (
    ModelCapabilities,
 )

+model_backend_name = "KoboldAI API"

 class APIException(Exception):
    """To be used for errors when using the Kobold API as an interface."""


-class APIInferenceModel(InferenceModel):
-    def __init__(self, base_url: str) -> None:
+class model_backend(InferenceModel):
+    def __init__(self) -> None:
        super().__init__()
-        self.base_url = base_url.rstrip("/")
+        self.base_url = ""
+        self.model_name = "KoboldAI API"
+
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "API"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.base_url = json.load(f)['base_url']
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "base_url",
+                                        "default": self.base_url,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.base_url = parameters['base_url'].rstrip("/")

    def _load(self, save_model: bool, initial_load: bool) -> None:
        tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"]
@@ -35,6 +63,10 @@ class APIInferenceModel(InferenceModel):
        # Do not allow API to be served over the API
        self.capabilties = ModelCapabilities(api_host=False)

+    def _save_settings(self):
+        with open("settings/api.model_backend.settings", "w") as f:
+            json.dump({"base_url": self.base_url}, f, indent="")
+
    def _raw_generate(
        self,
        prompt_tokens: Union[List[int], torch.Tensor],
--- a/modeling/inference_models/basic_api/class.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -4,6 +4,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os

 import utils
 from logger import logger
@@ -15,19 +16,54 @@ from modeling.inference_model import (
 )


+model_backend_name = "KoboldAI Old Colab Method"
+
 class BasicAPIException(Exception):
    """To be used for errors when using the Basic API as an interface."""


-class BasicAPIInferenceModel(InferenceModel):
+class model_backend(InferenceModel):
    def __init__(self) -> None:
        super().__init__()
+        self.colaburl = ""

        # Do not allow API to be served over the API
        self.capabilties = ModelCapabilities(api_host=False)
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "Colab"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.colaburl = json.load(f)['base_url']
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "colaburl",
+                                        "default": self.colaburl,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the Colab KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.colaburl = parameters['colaburl']
+
+    def _initialize_model(self):
+        return

    def _load(self, save_model: bool, initial_load: bool) -> None:
        self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
+    
+    def _save_settings(self):
+        with open("settings/basic_api.model_backend.settings", "w") as f:
+            json.dump({"colaburl": self.colaburl}, f, indent="")

    def _raw_generate(
        self,
@@ -68,7 +104,7 @@ class BasicAPIInferenceModel(InferenceModel):
        }

        # Create request
-        req = requests.post(utils.koboldai_vars.colaburl, json=reqdata)
+        req = requests.post(self.colaburl, json=reqdata)

        if req.status_code != 200:
            raise BasicAPIException(f"Bad status code {req.status_code}")
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -22,8 +22,13 @@ except ModuleNotFoundError as e:

 from modeling.inference_models.hf_torch import HFTorchInferenceModel

+model_backend_name = "Huggingface"

-class GenericHFTorchInferenceModel(HFTorchInferenceModel):
+class model_backend(HFTorchInferenceModel):
+    
+    def _initialize_model(self):
+        return
+    
    def _load(self, save_model: bool, initial_load: bool) -> None:
        utils.koboldai_vars.allowsp = True

@@ -36,9 +41,9 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):

        if self.model_name == "NeoCustom":
            self.model_name = os.path.basename(
-                os.path.normpath(utils.koboldai_vars.custmodpth)
+                os.path.normpath(self.path)
            )
-            utils.koboldai_vars.model = self.model_name
+        utils.koboldai_vars.model = self.model_name

        # If we specify a model and it's in the root directory, we need to move
        # it to the models directory (legacy folder structure to new)
@@ -54,7 +59,7 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
            "low_cpu_mem_usage": True,
        }

-        if utils.koboldai_vars.model_type == "gpt2":
+        if self.model_type == "gpt2":
            # We must disable low_cpu_mem_usage and if using a GPT-2 model
            # because GPT-2 is not compatible with this feature yet.
            tf_kwargs.pop("low_cpu_mem_usage", None)
@@ -64,12 +69,14 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):

        # If we're using torch_lazy_loader, we need to get breakmodel config
        # early so that it knows where to load the individual model tensors
+        logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
        if (
            self.lazy_load
            and utils.koboldai_vars.hascuda
-            and utils.koboldai_vars.breakmodel
-            and not utils.koboldai_vars.nobreakmodel
+            and self.breakmodel
+            and not self.nobreakmodel
        ):
+            logger.debug("loading breakmodel")
            self.breakmodel_device_config(self.model_config)

        if self.lazy_load:
@@ -241,11 +248,12 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):

        self.patch_embedding()

+        
        if utils.koboldai_vars.hascuda:
-            if utils.koboldai_vars.usegpu:
+            if self.usegpu:
                # Use just VRAM
                self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
-            elif utils.koboldai_vars.breakmodel:
+            elif self.breakmodel:
                # Use both RAM and VRAM (breakmodel)
                if not self.lazy_load:
                    self.breakmodel_device_config(self.model.config)
@@ -260,6 +268,11 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
            self._move_to_devices()
        else:
            self.model = self.model.to("cpu").float()
-
+        
+        
        self.model.kai_model = self
        utils.koboldai_vars.modeldim = self.get_hidden_size()
+
+    def _save_settings(self):
+        with open("settings/{}.generic_hf_torch.model_backend.settings".format(self.model_name.replace("/", "_")), "w") as f:
+            json.dump({"layers": self.layers if 'layers' in vars(self) else [], "disk_layers": self.disk_layers if 'disk_layers' in vars(self) else 0}, f, indent="")
--- a/modeling/inference_models/gooseai/class.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -0,0 +1,33 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+import os
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
+
+model_backend_name = "GooseAI"
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_backend(openai_gooseai_model_backend):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.url = "https://api.goose.ai/v1/engines"
+        self.source = "GooseAI"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return  model_name == "GooseAI"
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -1,25 +1,225 @@
-import os
+import os, sys
 from typing import Optional
 from transformers import AutoConfig
-
+import warnings
 import utils
+import json
 import koboldai_settings
 from logger import logger
 from modeling.inference_model import InferenceModel
+import torch
+import gc


 class HFInferenceModel(InferenceModel):
-    def __init__(self, model_name: str) -> None:
+    def __init__(self) -> None:
        super().__init__()
        self.model_config = None
-        self.model_name = model_name
+        #self.model_name = model_name

        self.model = None
        self.tokenizer = None
+        self.badwordsids = koboldai_settings.badwordsids_default
+        self.usegpu = False
+
+    def is_valid(self, model_name, model_path, menu_path):
+        try:
+            if model_path is not None and os.path.exists(model_path):
+                self.model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            else:
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            return True
+        except:
+            return False
+        
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        requested_parameters = []
+        if not self.hf_torch:
+            return []
+        if model_name == 'customhuggingface':
+            requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Huggingface Model Name",
+                                        "id": "custom_model_name",
+                                        "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Model name from https://huggingface.co/",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    })
+        
+        if model_name != 'customhuggingface' or "custom_model_name" in parameters:
+            model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name
+            if model_path is not None and os.path.exists(model_path):
+                self.model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            else:
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                    with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                        temp = json.load(f)
+                        break_values = temp['layers'] if 'layers' in temp else [layer_count]
+                        disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
+                else:
+                    break_values = [layer_count]
+                    disk_blocks = 0
+                
+                break_values = [int(x) for x in break_values if x != '' and x is not None]
+                gpu_count = torch.cuda.device_count()
+                break_values += [0] * (gpu_count - len(break_values))
+                if disk_blocks is not None:
+                    break_values += [int(disk_blocks)]
+                requested_parameters.append({
+                                                "uitype": "Valid Display",
+                                                "unit": "text",
+                                                "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value
+                                                "id": "valid_layers",
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+                for i in range(gpu_count):
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+                                                    "id": "{}_Layers".format(i),
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": break_values[i],
+                                                    "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
+                requested_parameters.append({
+                                                "uitype": "slider",
+                                                "unit": "int",
+                                                "label": "CPU Layers",
+                                                "id": "CPU_Layers",
+                                                "min": 0,
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                "default": layer_count - sum(break_values),
+                                                "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+                if disk_blocks is not None:
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "Disk Layers",
+                                                    "id": "Disk_Layers",
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": disk_blocks,
+                                                    "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
+            else:
+                requested_parameters.append({
+                                                "uitype": "toggle",
+                                                "unit": "bool",
+                                                "label": "Use GPU",
+                                                "id": "use_gpu",
+                                                "default": True,
+                                                "tooltip": "Whether or not to use the GPU",
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+                                            
+        
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2":
+            import breakmodel
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                gpu_count = torch.cuda.device_count()
+                layers = []
+                for i in range(gpu_count):
+                    if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
+                        layers.append(int(parameters["{}_Layers".format(i)]))
+                    elif isinstance(parameters["{}_Layers".format(i)], str):
+                         layers.append(None)
+                    else:
+                        layers.append(parameters["{}_Layers".format(i)])
+                self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
+                if isinstance(self.cpu_layers, str):
+                    self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
+                self.layers = layers
+                self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0    
+                if isinstance(self.disk_layers, str):
+                    self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
+                breakmodel.gpu_blocks = layers
+                breakmodel.disk_blocks = self.disk_layers
+                self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
+            self.model_type = self.get_model_type()
+            self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
+            self.lazy_load = True
+            logger.debug("Model type: {}".format(self.model_type))
+        else:
+            logger.debug("Disabling breakmodel and lazyload")
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.breakmodel = False
+            self.lazy_load = False
+        logger.info(parameters)
+        self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
+        self.path = parameters['path'] if 'path' in parameters else None
+
+    def unload(self):
+        if hasattr(self, 'model'):
+            self.model = None
+        if hasattr(self, 'tokenizer'):
+            self.tokenizer = None
+        if hasattr(self, 'model_config'):
+            self.model_config = None
+        with torch.no_grad():
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
+                for tensor in gc.get_objects():
+                    try:
+                        if torch.is_tensor(tensor):
+                            tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
+                    except:
+                        pass
+        gc.collect()
+        try:
+            with torch.no_grad():
+                torch.cuda.empty_cache()
+        except:
+            pass

    def _post_load(self) -> None:
+        self.badwordsids = koboldai_settings.badwordsids_default
+        self.model_type = str(self.model_config.model_type)
        # These are model specific tokenizer overrides if a model has bad defaults
-        if utils.koboldai_vars.model_type == "llama":
+        if self.model_type == "llama":
            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
            self.tokenizer.add_bos_token = False

@@ -103,32 +303,32 @@ class HFInferenceModel(InferenceModel):
                return result
            object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))

-        elif utils.koboldai_vars.model_type == "opt":
+        elif self.model_type == "opt":
            self.tokenizer._koboldai_header = self.tokenizer.encode("")
            self.tokenizer.add_bos_token = False
            self.tokenizer.add_prefix_space = False

        # Change newline behavior to match model quirks
-        if utils.koboldai_vars.model_type == "xglm":
+        if self.model_type == "xglm":
            # Default to </s> newline mode if using XGLM
            utils.koboldai_vars.newlinemode = "s"
-        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+        elif self.model_type in ["opt", "bloom"]:
            # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
            utils.koboldai_vars.newlinemode = "ns"

        # Clean up tokens that cause issues
        if (
-            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            self.badwordsids == koboldai_settings.badwordsids_default
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
        ):
-            utils.koboldai_vars.badwordsids = [
+            self.badwordsids = [
                [v]
                for k, v in self.tokenizer.get_vocab().items()
                if any(c in str(k) for c in "[]")
            ]

            if utils.koboldai_vars.newlinemode == "n":
-                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+                self.badwordsids.append([self.tokenizer.eos_token_id])

        return super()._post_load()

@@ -139,9 +339,12 @@ class HFInferenceModel(InferenceModel):
        Returns a string of the model's path locally, or None if it is not downloaded.
        If ignore_existance is true, it will always return a path.
        """
+        if self.path is not None:
+            if os.path.exists(self.path):
+                return self.path

        if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
-            model_path = utils.koboldai_vars.custmodpth
+            model_path = self.path
            assert model_path

            # Path can be absolute or relative to models directory
@@ -158,7 +361,7 @@ class HFInferenceModel(InferenceModel):

            return model_path

-        basename = utils.koboldai_vars.model.replace("/", "_")
+        basename = self.model_name.replace("/", "_")
        if legacy:
            ret = basename
        else:
@@ -176,15 +379,15 @@ class HFInferenceModel(InferenceModel):
                revision=utils.koboldai_vars.revision,
                cache_dir="cache",
            )
-            utils.koboldai_vars.model_type = self.model_config.model_type
+            self.model_type = self.model_config.model_type
        except ValueError:
-            utils.koboldai_vars.model_type = {
+            self.model_type = {
                "NeoCustom": "gpt_neo",
                "GPT2Custom": "gpt2",
-            }.get(utils.koboldai_vars.model)
+            }.get(self.model)

-            if not utils.koboldai_vars.model_type:
+            if not self.model_type:
                logger.warning(
                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                )
-                utils.koboldai_vars.model_type = "gpt_neo"
+                self.model_type = "gpt_neo"
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -19,18 +19,16 @@ from modeling.inference_model import (
 from modeling.inference_models.hf import HFInferenceModel
 from modeling.tokenizer import GenericTokenizer

-# This file shouldn't be imported unless using the TPU
-assert utils.koboldai_vars.use_colab_tpu
-import tpu_mtj_backend
+model_backend_name = "Huggingface MTJ"


-class HFMTJInferenceModel(HFInferenceModel):
+class model_backend(HFInferenceModel):
    def __init__(
        self,
-        model_name: str,
+        #model_name: str,
    ) -> None:
-        super().__init__(model_name)
-
+        super().__init__()
+        self.hf_torch = False
        self.model_config = None
        self.capabilties = ModelCapabilities(
            embedding_manipulation=False,
@@ -39,8 +37,13 @@ class HFMTJInferenceModel(HFInferenceModel):
            post_token_probs=False,
            uses_tpu=True,
        )
+        
+    def is_valid(self, model_name, model_path, menu_path):
+        # This file shouldn't be imported unless using the TPU
+        return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path)

    def setup_mtj(self) -> None:
+        import tpu_mtj_backend
        def mtj_warper_callback(scores) -> "np.array":
            scores_shape = scores.shape
            scores_list = scores.tolist()
@@ -147,7 +150,7 @@ class HFMTJInferenceModel(HFInferenceModel):

        tpu_mtj_backend.socketio = utils.socketio

-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
            utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox

        print(
@@ -155,7 +158,7 @@ class HFMTJInferenceModel(HFInferenceModel):
                Colors.PURPLE, Colors.END
            )
        )
-        if utils.koboldai_vars.model in (
+        if self.model_name in (
            "TPUMeshTransformerGPTJ",
            "TPUMeshTransformerGPTNeoX",
        ) and (
@@ -165,7 +168,7 @@ class HFMTJInferenceModel(HFInferenceModel):
            raise FileNotFoundError(
                f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder"
            )
-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
            tpu_mtj_backend.pad_token_id = 2

        tpu_mtj_backend.koboldai_vars = utils.koboldai_vars
@@ -176,13 +179,15 @@ class HFMTJInferenceModel(HFInferenceModel):
        tpu_mtj_backend.settings_callback = mtj_settings_callback

    def _load(self, save_model: bool, initial_load: bool) -> None:
+        import tpu_mtj_backend
        self.setup_mtj()
        self.init_model_config()
        utils.koboldai_vars.allowsp = True

+        logger.info(self.model_name)
        tpu_mtj_backend.load_model(
-            utils.koboldai_vars.model,
-            hf_checkpoint=utils.koboldai_vars.model
+            self.model_name,
+            hf_checkpoint=self.model_name
            not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
            and utils.koboldai_vars.use_colab_tpu,
            socketio_queue=koboldai_settings.queue,
@@ -198,7 +203,7 @@ class HFMTJInferenceModel(HFInferenceModel):

        if (
            utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
        ):
            utils.koboldai_vars.badwordsids = [
                [v]
@@ -207,6 +212,7 @@ class HFMTJInferenceModel(HFInferenceModel):
            ]

    def get_soft_tokens(self) -> np.array:
+        import tpu_mtj_backend
        soft_tokens = None

        if utils.koboldai_vars.sp is None:
@@ -258,6 +264,7 @@ class HFMTJInferenceModel(HFInferenceModel):
        seed: Optional[int] = None,
        **kwargs,
    ) -> GenerationResult:
+        import tpu_mtj_backend
        warpers.update_settings()

        soft_tokens = self.get_soft_tokens()
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False


 class HFTorchInferenceModel(HFInferenceModel):
-    def __init__(
-        self,
-        model_name: str,
-        lazy_load: bool,
-        low_mem: bool,
-    ) -> None:
-        super().__init__(model_name)
-        self.lazy_load = lazy_load
-        self.low_mem = low_mem
+    def __init__(self) -> None:
+        super().__init__()
+        self.hf_torch = True
+        self.lazy_load = True
+        self.low_mem = False
+        self.nobreakmodel = False

        self.post_token_hooks = [
            PostTokenHooks.stream_tokens,
@@ -128,7 +125,19 @@ class HFTorchInferenceModel(HFInferenceModel):
        else:
            return "Unknown"

+    def get_auxilary_device(self):
+        """Get device auxilary tensors like inputs should be stored on."""
+
+        # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
+        if utils.koboldai_vars.hascuda and self.usegpu:
+            return utils.koboldai_vars.gpu_device
+        elif utils.koboldai_vars.hascuda and self.breakmodel:
+            import breakmodel
+            return breakmodel.primary_device
+        return "cpu"
+
    def _post_load(m_self) -> None:
+
        if not utils.koboldai_vars.model_type:
            utils.koboldai_vars.model_type = m_self.get_model_type()

@@ -211,40 +220,6 @@ class HFTorchInferenceModel(HFInferenceModel):
        new_sample.old_sample = transformers.GenerationMixin.sample
        use_core_manipulations.sample = new_sample

-        # PEFT Loading. This MUST be done after all save_pretrained calls are
-        # finished on the main model.
-        if utils.args.peft:
-            from peft import PeftModel, PeftConfig
-            local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
-
-            # Make PEFT dir if it doesn't exist
-            try:
-                os.makedirs(local_peft_dir)
-            except FileExistsError:
-                pass
-
-            peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
-            logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
-
-            peft_installed_locally = True
-            possible_peft_locations = [peft_local_path, utils.args.peft]
-
-            for i, location in enumerate(possible_peft_locations):
-                try:
-                    m_self.model = PeftModel.from_pretrained(m_self.model, location)
-                    logger.debug(f"Loaded PEFT at '{location}'")
-                    break
-                except ValueError:
-                    peft_installed_locally = False
-                    if i == len(possible_peft_locations) - 1:
-                        raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
-                except RuntimeError:
-                    raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
-
-            if not peft_installed_locally:
-                logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
-                m_self.model.save_pretrained(peft_local_path)
-
        return super()._post_load()

    def _raw_generate(
@@ -262,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel):
        else:
            gen_in = prompt_tokens

-        device = utils.get_auxilary_device()
+        device = self.get_auxilary_device()
        gen_in = gen_in.to(device)

        additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
@@ -272,19 +247,14 @@ class HFTorchInferenceModel(HFInferenceModel):

        with torch.no_grad():
            start_time = time.time()
-
-            # HEED & BEWARE: All arguments passed to self.model.generate MUST be
-            # kwargs; see https://github.com/huggingface/peft/issues/232. If they
-            # aren't, PeftModel will EXPLODE!!!! But nothing will happen without
-            # a PEFT loaded so it's sneaky.
            genout = self.model.generate(
-                input_ids=gen_in,
+                gen_in,
                do_sample=True,
                max_length=min(
                    len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
                ),
                repetition_penalty=1.0,
-                bad_words_ids=utils.koboldai_vars.badwordsids
+                bad_words_ids=self.badwordsids
                + additional_bad_words_ids,
                use_cache=True,
                num_return_sequences=batch_count,
@@ -304,7 +274,6 @@ class HFTorchInferenceModel(HFInferenceModel):
    def _get_model(self, location: str, tf_kwargs: Dict):
        tf_kwargs["revision"] = utils.koboldai_vars.revision
        tf_kwargs["cache_dir"] = "cache"
-        tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code

        # If we have model hints for legacy model, use them rather than fall back.
        try:
@@ -444,8 +413,6 @@ class HFTorchInferenceModel(HFInferenceModel):
        if not self.lazy_load:
            return

-        if utils.args.breakmodel_disklayers is not None:
-            breakmodel.disk_blocks = utils.args.breakmodel_disklayers

        disk_blocks = breakmodel.disk_blocks
        gpu_blocks = breakmodel.gpu_blocks
@@ -489,10 +456,10 @@ class HFTorchInferenceModel(HFInferenceModel):
                ):
                    device_map[key] = (
                        utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                        else "cpu"
                        if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                        else breakmodel.primary_device
                    )
                else:
@@ -508,12 +475,12 @@ class HFTorchInferenceModel(HFInferenceModel):
                    )
                    device = (
                        utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                        else "disk"
                        if layer < disk_blocks and layer < ram_blocks
                        else "cpu"
                        if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                        else "shared"
                        if layer < ram_blocks
                        else bisect.bisect_right(
@@ -607,6 +574,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                                )
                            )
                            # print(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
+                            #logger.debug(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
                            model_dict[key] = model_dict[key].materialize(
                                f, map_location="cpu"
                            )
@@ -617,15 +585,15 @@ class HFTorchInferenceModel(HFInferenceModel):
                                and breakmodel.primary_device != "cpu"
                                and utils.koboldai_vars.hascuda
                                and (
-                                    utils.koboldai_vars.breakmodel
-                                    or utils.koboldai_vars.usegpu
+                                    self.breakmodel
+                                    or self.usegpu
                                )
                                and model_dict[key].dtype is torch.float32
                            ):
                                model_dict[key] = model_dict[key].to(torch.float16)
                            if breakmodel.primary_device == "cpu" or (
-                                not utils.koboldai_vars.usegpu
-                                and not utils.koboldai_vars.breakmodel
+                                not self.usegpu
+                                and not self.breakmodel
                                and model_dict[key].dtype is torch.float16
                            ):
                                model_dict[key] = model_dict[key].to(torch.float32)
@@ -663,14 +631,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                        and breakmodel.primary_device != "cpu"
                                        and utils.koboldai_vars.hascuda
                                        and (
-                                            utils.koboldai_vars.breakmodel
-                                            or utils.koboldai_vars.usegpu
+                                            self.breakmodel
+                                            or self.usegpu
                                        )
                                    ):
                                        dtype = torch.float16
                                    if breakmodel.primary_device == "cpu" or (
-                                        not utils.koboldai_vars.usegpu
-                                        and not utils.koboldai_vars.breakmodel
+                                        not self.usegpu
+                                        and not self.breakmodel
                                    ):
                                        dtype = torch.float32
                                    if (
@@ -726,16 +694,16 @@ class HFTorchInferenceModel(HFInferenceModel):
                            and breakmodel.primary_device != "cpu"
                            and utils.koboldai_vars.hascuda
                            and (
-                                utils.koboldai_vars.breakmodel
-                                or utils.koboldai_vars.usegpu
+                                self.breakmodel
+                                or self.usegpu
                            )
                            and model_dict[key].dtype is torch.float32
                        ):
                            model_dict[key] = model_dict[key].to(torch.float16)

                        if breakmodel.primary_device == "cpu" or (
-                            not utils.koboldai_vars.usegpu
-                            and not utils.koboldai_vars.breakmodel
+                            not self.usegpu
+                            and not self.breakmodel
                            and model_dict[key].dtype is torch.float16
                        ):
                            model_dict[key] = model_dict[key].to(torch.float32)
@@ -774,14 +742,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                    and breakmodel.primary_device != "cpu"
                                    and utils.koboldai_vars.hascuda
                                    and (
-                                        utils.koboldai_vars.breakmodel
-                                        or utils.koboldai_vars.usegpu
+                                        self.breakmodel
+                                        or self.usegpu
                                    )
                                ):
                                    dtype = torch.float16
                                if breakmodel.primary_device == "cpu" or (
-                                    not utils.koboldai_vars.usegpu
-                                    and not utils.koboldai_vars.breakmodel
+                                    not self.usegpu
+                                    and not self.breakmodel
                                ):
                                    dtype = torch.float32
                                if (
@@ -815,7 +783,7 @@ class HFTorchInferenceModel(HFInferenceModel):
        if always_use or (
            utils.koboldai_vars.hascuda
            and self.low_mem
-            and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
+            and (self.usegpu or self.breakmodel)
        ):
            original_dtype = torch.get_default_dtype()
            torch.set_default_dtype(torch.float16)
@@ -830,6 +798,8 @@ class HFTorchInferenceModel(HFInferenceModel):
        device_count = torch.cuda.device_count()
        if device_count < 2:
            primary = None
+        logger.debug("n_layers: {}".format(n_layers))
+        logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
        gpu_blocks = breakmodel.gpu_blocks + (
            device_count - len(breakmodel.gpu_blocks)
        ) * [0]
@@ -860,155 +830,47 @@ class HFTorchInferenceModel(HFInferenceModel):

        n_layers = utils.num_layers(config)

+        logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
+
        if utils.args.cpu:
            breakmodel.gpu_blocks = [0] * n_layers
            return

-        elif (
-            utils.args.breakmodel_gpulayers is not None
-            or utils.args.breakmodel_disklayers is not None
-        ):
-            try:
-                if not utils.args.breakmodel_gpulayers:
-                    breakmodel.gpu_blocks = []
-                else:
-                    breakmodel.gpu_blocks = list(
-                        map(int, utils.args.breakmodel_gpulayers.split(","))
-                    )
-                assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
-                s = n_layers
-                for i in range(len(breakmodel.gpu_blocks)):
-                    if breakmodel.gpu_blocks[i] <= -1:
-                        breakmodel.gpu_blocks[i] = s
-                        break
-                    else:
-                        s -= breakmodel.gpu_blocks[i]
-                assert sum(breakmodel.gpu_blocks) <= n_layers
-                n_layers -= sum(breakmodel.gpu_blocks)
-                if utils.args.breakmodel_disklayers is not None:
-                    assert utils.args.breakmodel_disklayers <= n_layers
-                    breakmodel.disk_blocks = utils.args.breakmodel_disklayers
-                    n_layers -= utils.args.breakmodel_disklayers
-            except:
-                logger.warning(
-                    "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
-                )
-                breakmodel.gpu_blocks = [n_layers]
-                n_layers = 0
-        elif utils.args.breakmodel_layers is not None:
-            breakmodel.gpu_blocks = [
-                n_layers - max(0, min(n_layers, utils.args.breakmodel_layers))
-            ]
-            n_layers -= sum(breakmodel.gpu_blocks)
-        elif utils.args.model is not None:
+        elif breakmodel.gpu_blocks == []:
            logger.info("Breakmodel not specified, assuming GPU 0")
            breakmodel.gpu_blocks = [n_layers]
            n_layers = 0
+        
        else:
-            device_count = torch.cuda.device_count()
-            if device_count > 1:
-                print(
-                    Colors.CYAN
-                    + "\nPlease select one of your GPUs to be your primary GPU."
-                )
-                print(
-                    "VRAM usage in your primary GPU will be higher than for your other ones."
-                )
-                print("It is recommended you make your fastest GPU your primary GPU.")
-                self.breakmodel_device_list(n_layers)
-                while True:
-                    primaryselect = input("device ID> ")
-                    if (
-                        primaryselect.isnumeric()
-                        and 0 <= int(primaryselect) < device_count
-                    ):
-                        breakmodel.primary_device = int(primaryselect)
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between 0 and {device_count-1}.{Colors.END}"
-                        )
-            else:
-                breakmodel.primary_device = 0
-
-            print(
-                Colors.PURPLE
-                + "\nIf you don't have enough VRAM to run the model on a single GPU"
-            )
-            print(
-                "you can split the model between your CPU and your GPU(s), or between"
-            )
-            print("multiple GPUs if you have more than one.")
-            print("By putting more 'layers' on a GPU or CPU, more computations will be")
-            print(
-                "done on that device and more VRAM or RAM will be required on that device"
-            )
-            print("(roughly proportional to number of layers).")
-            print(
-                "It should be noted that GPUs are orders of magnitude faster than the CPU."
-            )
-            print(
-                f"This model has{Colors.YELLOW} {n_layers} {Colors.PURPLE}layers.{Colors.END}\n"
-            )
-
-            for i in range(device_count):
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=i
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into device {i}?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.gpu_blocks.append(layerselect)
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
-                if n_layers == 0:
+            s = n_layers
+            for i in range(len(breakmodel.gpu_blocks)):
+                if breakmodel.gpu_blocks[i] <= -1:
+                    breakmodel.gpu_blocks[i] = s
                    break
-
-            if n_layers > 0:
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=-1
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into the disk cache?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.disk_blocks = layerselect
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
+                else:
+                    s -= breakmodel.gpu_blocks[i]
+            assert sum(breakmodel.gpu_blocks) <= n_layers
+            n_layers -= sum(breakmodel.gpu_blocks)
+            if breakmodel.disk_blocks is not None:
+                assert breakmodel.disk_blocks <= n_layers
+                n_layers -= breakmodel.disk_blocks

        logger.init_ok("Final device configuration:", status="Info")
        self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
+        with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file:
+            file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks))

        # If all layers are on the same device, use the old GPU generation mode
        while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
            breakmodel.gpu_blocks.pop()
+        self.breakmodel = True
        if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
            -1,
            utils.num_layers(config),
        ):
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = True
+            logger.debug("All layers on same GPU. Breakmodel disabled")
+            self.breakmodel = False
+            self.usegpu = True
            utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
            return

@@ -1017,6 +879,6 @@ class HFTorchInferenceModel(HFInferenceModel):
            import breakmodel

            breakmodel.primary_device = "cpu"
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = False
+            self.breakmodel = False
+            self.usegpu = False
            return
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -1,10 +1,11 @@
 from __future__ import annotations

-import time
+import time, json
 import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os

 import utils
 from logger import logger
@@ -16,25 +17,131 @@ from modeling.inference_model import (
    ModelCapabilities,
 )

+model_backend_name = "Horde"

 class HordeException(Exception):
    """To be used for errors on server side of the Horde."""


-class HordeInferenceModel(InferenceModel):
+class model_backend(InferenceModel):
    def __init__(self) -> None:
        super().__init__()
+        self.url = "https://horde.koboldai.net"
+        self.key = "0000000000"
+        self.models = self.get_cluster_models()
+        self.model_name = "Horde"
+        self.model = []
+        

        # Do not allow API to be served over the API
        self.capabilties = ModelCapabilities(api_host=False)

+    def is_valid(self, model_name, model_path, menu_path):
+        logger.debug("Horde Models: {}".format(self.models))
+        return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/horde.model_backend.settings", "r") as f:
+                temp = json.load(f)
+                self.base_url = temp['url']
+                self.key = temp['key']
+        if 'key' in parameters:
+            self.key = parameters['key']
+        if 'url' in parameters:
+            self.url = parameters['url']
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "url",
+                                        "default": self.url if 'url' not in parameters else parameters['url'],
+                                        "tooltip": "URL to the horde.",
+                                        "menu_path": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": self.key if 'key' not in parameters else parameters['key'],
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": model_name,
+                                        "check": {"value": "", 'check': "!="},
+                                        'multiple': True,
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.models,
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
+        self.url = parameters['url']
+        
+    def get_cluster_models(self):
+        # Get list of models from public cluster
+        try:
+            req = requests.get(f"{self.url}/api/v2/status/models?type=text")
+        except:
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error("Provided KoboldAI Horde URL unreachable")
+            emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
+            return
+        if not req.ok:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
+            return
+
+        engines = req.json()
+        try:
+            engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
+        except:
+            logger.error(engines)
+            raise
+        logger.debug(engines)
+        
+        online_model = ""
+
+        logger.init_ok("KAI Horde Models", status="OK")
+
+        return engines
+
    def _load(self, save_model: bool, initial_load: bool) -> None:
+        tokenizer_name = "gpt2"
+        if len(self.model) > 0:
+            if self.model[0] == "all" and len(self.model) > 1:
+                tokenizer_name = self.model[1]
+            else:
+                tokenizer_name = self.model[0]
        self.tokenizer = self._get_tokenizer(
-            utils.koboldai_vars.cluster_requested_models[0]
-            if len(utils.koboldai_vars.cluster_requested_models) > 0
-            else "gpt2",
+            tokenizer_name
        )

+    def _save_settings(self):
+        with open("settings/horde.model_backend.settings", "w") as f:
+            json.dump({"key": self.key, "url": self.url}, f, indent="")
+
    def _raw_generate(
        self,
        prompt_tokens: Union[List[int], torch.Tensor],
@@ -80,14 +187,14 @@ class HordeInferenceModel(InferenceModel):

        client_agent = "KoboldAI:2.0.0:koboldai.org"
        cluster_headers = {
-            "apikey": utils.koboldai_vars.horde_api_key,
+            "apikey": self.key,
            "Client-Agent": client_agent,
        }

        try:
            # Create request
            req = requests.post(
-                f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async",
+                f"{self.url}/api/v2/generate/text/async",
                json=cluster_metadata,
                headers=cluster_headers,
            )
@@ -125,7 +232,7 @@ class HordeInferenceModel(InferenceModel):
        while not finished:
            try:
                req = requests.get(
-                    f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}",
+                    f"{self.url}/api/v2/generate/text/status/{request_id}",
                    headers=cluster_agent_headers,
                )
            except requests.exceptions.ConnectionError:
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -1,106 +0,0 @@
-import torch
-import requests
-import numpy as np
-from typing import List, Optional, Union
-
-import utils
-from logger import logger
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-)
-
-
-class OpenAIAPIError(Exception):
-    def __init__(self, error_type: str, error_message) -> None:
-        super().__init__(f"{error_type}: {error_message}")
-
-
-class OpenAIAPIInferenceModel(InferenceModel):
-    """InferenceModel for interfacing with OpenAI's generation API."""
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self.tokenizer = self._get_tokenizer("gpt2")
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-
-        if seed is not None:
-            logger.warning(
-                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
-            )
-
-        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
-
-        # Store context in memory to use it for comparison with generated content
-        utils.koboldai_vars.lastctx = decoded_prompt
-
-        # Build request JSON data
-        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
-        # as the koboldai_vars.model will always be OAI
-        if "GooseAI" in utils.koboldai_vars.configname:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_a": gen_settings.top_a,
-                "top_p": gen_settings.top_p,
-                "top_k": gen_settings.top_k,
-                "tfs": gen_settings.tfs,
-                "typical_p": gen_settings.typical,
-                "repetition_penalty": gen_settings.rep_pen,
-                "repetition_penalty_slope": gen_settings.rep_pen_slope,
-                "repetition_penalty_range": gen_settings.rep_pen_range,
-                "n": batch_count,
-                # TODO: Implement streaming
-                "stream": False,
-            }
-        else:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_p": gen_settings.top_p,
-                "frequency_penalty": gen_settings.rep_pen,
-                "n": batch_count,
-                "stream": False,
-            }
-
-        req = requests.post(
-            utils.koboldai_vars.oaiurl,
-            json=reqdata,
-            headers={
-                "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey,
-                "Content-Type": "application/json",
-            },
-        )
-
-        j = req.json()
-
-        if not req.ok:
-            # Send error message to web client
-            if "error" in j:
-                error_type = j["error"]["type"]
-                error_message = j["error"]["message"]
-            else:
-                error_type = "Unknown"
-                error_message = "Unknown"
-            raise OpenAIAPIError(error_type, error_message)
-
-        outputs = [out["text"] for out in j["choices"]]
-        return GenerationResult(
-            model=self,
-            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
-            prompt=prompt_tokens,
-            is_whole_generation=True,
-            single_line=single_line,
-        )
--- a/modeling/inference_models/openai/class.py
+++ b/modeling/inference_models/openai/class.py
@@ -0,0 +1,33 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+import os
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
+
+model_backend_name = "OpenAI"
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_backend(openai_gooseai_model_backend):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.url = "https://api.openai.com/v1/engines"
+        self.source = "OpenAI"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI"
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -0,0 +1,199 @@
+import torch
+import requests,json
+import numpy as np
+from typing import List, Optional, Union
+import os
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_backend(InferenceModel):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.key = ""
+        self.url = "https://api.goose.ai/v1/engines"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI" or model_name == "GooseAI"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
+            with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
+                try:
+                    self.key = json.load(f)['key']
+                except:
+                    pass
+        if 'key' in parameters:
+            self.key = parameters['key']
+        self.source = model_name
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": self.key,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.get_oai_models(),
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model_name = parameters['model']
+
+    def get_oai_models(self):
+        if self.key == "":
+            return []
+        
+            
+        # Get list of models from OAI
+        logger.init("OAI Engines", status="Retrieving")
+        req = requests.get(
+            self.url, 
+            headers = {
+                'Authorization': 'Bearer '+self.key
+                }
+            )
+        if(req.status_code == 200):
+            r = req.json()
+            engines = r["data"]
+            try:
+                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+            except:
+                logger.error(engines)
+                raise
+            
+            online_model = ""
+
+                
+            logger.init_ok("OAI Engines", status="OK")
+            logger.debug("OAI Engines: {}".format(engines))
+            return engines
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("OAI Engines", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+            return []
+            
+
+    def _load(self, save_model: bool, initial_load: bool) -> None:
+        self.tokenizer = self._get_tokenizer("gpt2")
+
+    def _save_settings(self):
+        with open("settings/{}.model_backend.settings".format(self.source), "w") as f:
+            json.dump({"key": self.key}, f, indent="")
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ) -> GenerationResult:
+
+        if seed is not None:
+            logger.warning(
+                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
+            )
+
+        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
+
+        # Store context in memory to use it for comparison with generated content
+        utils.koboldai_vars.lastctx = decoded_prompt
+
+        # Build request JSON data
+        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
+        # as the koboldai_vars.model will always be OAI
+        if self.source == "GooseAI":
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_a": gen_settings.top_a,
+                "top_p": gen_settings.top_p,
+                "top_k": gen_settings.top_k,
+                "tfs": gen_settings.tfs,
+                "typical_p": gen_settings.typical,
+                "repetition_penalty": gen_settings.rep_pen,
+                "repetition_penalty_slope": gen_settings.rep_pen_slope,
+                "repetition_penalty_range": gen_settings.rep_pen_range,
+                "n": batch_count,
+                # TODO: Implement streaming
+                "stream": False,
+            }
+        else:
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_p": gen_settings.top_p,
+                "frequency_penalty": gen_settings.rep_pen,
+                "n": batch_count,
+                "stream": False,
+            }
+
+        req = requests.post(
+            "{}/{}/completions".format(self.url, self.model_name),
+            json=reqdata,
+            headers={
+                "Authorization": "Bearer " + self.key,
+                "Content-Type": "application/json",
+            },
+        )
+
+        j = req.json()
+
+        if not req.ok:
+            # Send error message to web client
+            if "error" in j:
+                error_type = j["error"]["type"]
+                error_message = j["error"]["message"]
+            else:
+                error_type = "Unknown"
+                error_message = "Unknown"
+            raise OpenAIAPIError(error_type, error_message)
+
+        outputs = [out["text"] for out in j["choices"]]
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
--- a/modeling/inference_models/readonly/class.py
+++ b/modeling/inference_models/readonly/class.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+    ModelCapabilities,
+)
+
+model_backend_name = "Read Only"
+
+class BasicAPIException(Exception):
+    """To be used for errors when using the Basic API as an interface."""
+
+
+class model_backend(InferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+
+        # Do not allow API to be served over the API
+        self.capabilties = ModelCapabilities(api_host=False)
+        self.tokenizer = self._tokenizer()
+        self.model = None
+        self.model_name = "Read Only"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "ReadOnly"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        requested_parameters = []
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        return
+
+    def unload(self):
+        utils.koboldai_vars.noai = False
+
+    def _initialize_model(self):
+        return
+    
+    class _tokenizer():
+        def __init__(self):
+            self._koboldai_header = []
+        def decode(self, _input):
+            return ""
+        def encode(self, input_text):
+            return []
+
+    def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
+        self.tokenizer = self.tokenizer
+        self.model = None
+        utils.koboldai_vars.noai = True
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ):
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
--- a/static/application.js
+++ b/static/application.js
@@ -1,3 +1,5 @@
+
+
 //=================================================================//
 //  VARIABLES
 //=================================================================//
@@ -2333,6 +2335,8 @@ $(document).ready(function(){
 	socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 	socket.on('popup_edit_file', function(data){popup_edit_file(data);});
 	socket.on('error_popup', function(data){error_popup(data);});
+	socket.on('open_model_load_menu', function(data){show_model_menu(data);});
+	socket.on('selected_model_info', function(data){selected_model_info(data);});

 	socket.on('from_server', function(msg) {
 		//console.log(msg);
@@ -3332,28 +3336,6 @@ $(document).ready(function(){
 		hideLoadPopup();
 	});
 	
-	load_model_accept.on("click", function(ev) {
-		hideMessage();
-		var gpu_layers;
-		var message;
-		if($("#modellayers")[0].classList.contains('hidden')) {
-			gpu_layers = ","
-		} else {
-			gpu_layers = ""
-			for (let i=0; i < $("#gpu_count")[0].value; i++) {
-				gpu_layers += $("#gpu_layers"+i)[0].value + ",";
-			}
-		}
-		var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
-		models = getSelectedOptions(document.getElementById('oaimodel'));
-		if (models.length == 1) {
-			models = models[0];
-		}
-		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
-		socket.send(message);
-		loadmodelcontent.html("");
-		hideLoadModelPopup();
-	});

 	sp_close.on("click", function(ev) {
 		hideSPPopup();
@@ -3388,8 +3370,9 @@ $(document).ready(function(){
 	});
 	
 	button_loadmodel.on("click", function(ev) {
-		showLoadModelPopup();
-		socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		//showLoadModelPopup();
+		//socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		socket.emit('load_model_button', {});
 	});
 	button_showmodel.on("click", function(ev) {
 		socket.send({'cmd': 'show_model', 'data': ''});
@@ -3836,3 +3819,713 @@ function show_message(data) {
 	
 	document.getElementById('message-popup').classList.remove('hidden');
 }
+
+
+
+
+
+
+
+//-----------------------------------------------------Copy from UI2--------------------------------------------------------
+function show_model_menu(data) {
+	console.log(data);
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	//clear out the breadcrumbs
+	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
+	while (breadcrumbs.firstChild) {
+		breadcrumbs.removeChild(breadcrumbs.firstChild);
+	}
+	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
+	//clear out the items
+	var model_list = document.getElementById('loadmodellistcontent')
+	while (model_list.firstChild) {
+		model_list.removeChild(model_list.firstChild);
+	}
+	//add items
+	for (item of data.items) {
+		var list_item = document.createElement("span");
+		list_item.classList.add("model_item");
+		
+		//create the folder icon
+		var folder_icon = document.createElement("span");
+		folder_icon.classList.add("material-icons-outlined");
+		folder_icon.classList.add("cursor");
+
+		let isModel = !(
+			item.isMenu ||
+			item.label === "Load a model from its directory" ||
+			item.label === "Load an old GPT-2 model (eg CloverEdition)"
+		);
+
+		folder_icon.textContent = isModel ? "psychology" : "folder";
+		list_item.append(folder_icon);
+		
+		
+		//create the actual item
+		var popup_item = document.createElement("span");
+		popup_item.classList.add("model");
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
+		}
+		
+		//name text
+		var text = document.createElement("span");
+		text.style="grid-area: item;";
+		text.textContent = item.label;
+		popup_item.append(text);
+		//model size text
+		var text = document.createElement("span");
+		text.textContent = item.size;
+		text.style="grid-area: gpu_size;padding: 2px;";
+		popup_item.append(text);
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!isModel) return;
+
+			let parameterCount = getModelParameterCount(item.label);
+			if (!parameterCount) return;
+
+			let warningText = "";
+
+			if (parameterCount > 25_000_000_000) warningText = "This is a very high-end model and will likely not run without a specialized setup."; // 25B
+			if (parameterCount < 2_000_000_000) warningText = "This is a lower-end model and may perform poorly.";			// 2B
+			if (parameterCount < 1_000_000_000) warningText = "This is a very low-end model and may perform incoherently.";	// 1B
+
+			if (!warningText) return;
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-size-warning"],
+				innerText: "warning",
+				"style.grid-area": "warning_icon",
+				tooltip: warningText
+			});
+
+		})();
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!item.isDownloaded) return;
+			if (!isModel) return;
+
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-download-notification"],
+				innerText: "download_done",
+				"style.grid-area": "downloaded_icon",
+				tooltip: "This model is already downloaded."
+			});
+		})();
+		
+		list_item.append(popup_item);
+		model_list.append(list_item);
+	}
+	
+	
+	openPopup("load-model");
+	
+}
+
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
+					}
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
+			}
+		}
+	}
+	var accept = document.getElementById("btn_loadmodelaccept");
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
+		}
+	}
+	
+	if (ok_to_load) {
+		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
+	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
+}
+
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		
+		//create the user input for each requested input
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = model_settings_checker;
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
+				slider.onchange();
+				slider.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').remove();
+			}
+			if (item['uitype'] == "toggle") {
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.noresubmit = true;
+				select_element.onchange();
+				select_element.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.noresubmit = true;
+				password_item.onchange();
+				password_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').remove();
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.noresubmit = true;
+				text_item.onchange();
+				text_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').remove();
+			}
+			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
+	
+	model_settings_checker()
+	
+}
+
+function getModelParameterCount(modelName) {
+	if (!modelName) return null;
+
+	// The "T" and "K" may be a little optimistic...
+	let paramsString = modelName.toUpperCase().match(/[\d.]+[TBMK]/)
+	if (!paramsString) return null;
+	paramsString = paramsString[0];
+
+	let base = parseFloat(paramsString);
+	let multiplier = {T: 1_000_000_000_000, B: 1_000_000_000, M: 1_000_000, K: 1_000}[paramsString[paramsString.length - 1]];
+
+	return base * multiplier;
+}
+
+function openPopup(id) {
+	closePopups();
+
+	const container = document.getElementById("popup-container");
+	container.classList.remove("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+
+	const popup = document.getElementById(`${id}`);
+	popup.classList.remove("hidden");
+
+	// Sometimes we want to instantly focus on certain elements when a menu opens.
+	for (const noticeMee of popup.getElementsByClassName("focus-on-me")) {
+		noticeMee.focus();
+		break;
+	}
+}
+
+function closePopups() {
+	const container = document.getElementById("popup-container");
+	container.classList.add("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+}
+
+function $el(selector) {
+	// We do not preemptively fetch all elements upon execution (wall of consts)
+	// due to the layer of mental overhead it adds to debugging and reading
+	// code in general.
+	return document.querySelector(selector);
+}
+
+function $e(tag, parent, attributes, insertionLocation=null) {
+	// Small helper function for dynamic UI creation
+
+	let element = document.createElement(tag);
+
+	if (!attributes) attributes = {};
+
+	if ("classes" in attributes) {
+		if (!Array.isArray(attributes.classes)) throw Error("Classes was not array!");
+		for (const className of attributes.classes) {
+			element.classList.add(className);
+		}
+		delete attributes.classes;
+	}
+
+
+	for (const [attribute, value] of Object.entries(attributes)) {
+		if (attribute.includes(".")) {
+			let ref = element;
+			const parts = attribute.split(".");
+
+			for (const part of parts.slice(0, -1)) {
+				ref = ref[part];
+			}
+
+			ref[parts[parts.length - 1]] = value;
+			continue;
+		}
+
+		if (attribute in element) {
+			element[attribute] = value;
+		} else {
+			element.setAttribute(attribute, value);
+		}
+	}
+
+	if (!parent) return element;
+
+	if (insertionLocation && Object.keys(insertionLocation).length) {
+		let [placement, target] = Object.entries(insertionLocation)[0];
+		if (placement === "before") {
+			parent.insertBefore(element, target);
+		} else if (placement === "after") {
+			parent.insertBefore(element, target.nextSibling);
+		} else {
+			throw Error(`I have no clue what placement ${placement} is`);
+		}
+	} else {
+		parent.appendChild(element);
+	}
+
+	return element;
+}
+
+function load_model() {
+	var accept = document.getElementById('btn_loadmodelaccept');
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+	
+	//get an object of all the input settings from the user
+	data = {}
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
+		}
+	}
+	data = {...data, ...selected_model_data};
+	
+	data['plugin'] = document.getElementById("modelplugin").value;
+	
+	socket.emit("load_model", data);
+	closePopups();
+}
+
+function initalizeTooltips() {
+	const tooltip = $e("span", document.body, {id: "tooltip-text", "style.display": "none"});
+	let tooltipTarget = null;
+
+	function alterTooltipState(target, specialClass=null) {
+		tooltipTarget = target;
+		tooltip.style.display = target ? "block" : "none";
+		tooltip.className = specialClass || "";
+	}
+
+	function registerElement(el) {
+		// el should have attribute "tooltip"
+		let text = el.getAttribute("tooltip");
+
+		el.addEventListener("mouseenter", function(event) {
+			if (!el.hasAttribute("tooltip")) return;
+			tooltip.innerText = text;
+			let specialClass = "tooltip-standard";
+
+			// Kinda lame
+			if (this.classList.contains("context-token")) specialClass = "tooltip-context-token";
+
+			alterTooltipState(el, specialClass);
+		});
+
+		el.addEventListener("mouseleave", function(event) {
+			alterTooltipState(null);
+		});
+	}
+
+	const xOffset = 10;
+	const yOffset = 15;
+
+	document.addEventListener("mousemove", function(event) {
+		if (!tooltipTarget) return;
+
+		let [x, y] = [event.x, event.y];
+
+		// X + the tooltip's width is the farthest point right we will display;
+		// let's account for it. If we will render outside of the window,
+		// subtract accordingly.
+		let xOverflow = (x + tooltip.clientWidth) - window.innerWidth;
+		if (xOverflow > 0) x -= xOverflow;
+
+		if (xOverflow + xOffset < 0) x += xOffset;
+
+		// Same for Y!
+		let yOverflow = (y + tooltip.clientHeight) - window.innerHeight;
+		if (yOverflow > 0) y -= yOverflow;
+
+		if (yOverflow + yOffset < 0) y += yOffset;
+
+		tooltip.style.left = `${x}px`;
+		tooltip.style.top = `${y}px`;
+	});
+
+	// Inital scan
+	for (const element of document.querySelectorAll("[tooltip]")) {
+		registerElement(element);
+	}
+
+	// Use a MutationObserver to catch future tooltips
+	const observer = new MutationObserver(function(records, observer) {
+		for (const record of records) {
+			
+			if (record.type === "attributes") {
+				// Sanity check
+				if (record.attributeName !== "tooltip") continue;
+				registerElement(record.target);
+				continue;
+			}
+			
+			// If we remove the tooltip target, stop showing the tooltip. Maybe a little ineffecient.
+			if (!document.body.contains(tooltipTarget)) alterTooltipState(null);
+
+			for (const node of record.addedNodes) {
+				if (node.nodeType !== 1) continue;
+
+				if (node.hasAttribute("tooltip")) registerElement(node);
+
+				// Register for descendants (Slow?)
+				for (const element of node.querySelectorAll("[tooltip]")) {
+					registerElement(element);
+				}
+			}
+		}
+	});
+	observer.observe(document.body, {
+		childList: true,
+		subtree: true,
+		attributeFilter: ["tooltip"],
+	});
+}
+
+// Must be done before any elements are made; we track their changes.
+console.log(document.body);
+initalizeTooltips();
--- a/static/custom.css
+++ b/static/custom.css
@@ -1728,4 +1728,691 @@ body.connected .popupfooter, .popupfooter.always-available {

 .wientry > .input-token-usage {
 	bottom: 8px;
+}
+
+
+/*----------------------------------------------COPY FROM UI2-----------------------------------------------------------------------*/
+:root {
+	/*General*/
+	--background: #252e3b;
+	--gamescreen_background: #111820;
+    --input_background: #111820;
+
+    --text: #e0e0e0;
+    --text_to_ai_color: #e0e0e0;
+    --text_edit: #9cc3ee;
+	--action_mode_input: #33E978;
+
+	--statusbar_color: #eedcb880;
+	--statusbar_text_color: #e0e0e0;
+	--scrollbar-color: #2f3b4bdb;
+
+	/*Buttons*/
+		/*General*/
+	--enabled_button_text: #e0e0e0;
+    --enabled_button_background_color: #2d3d52;
+    --enabled_button_border_color: #253446;
+
+	--disabled_button_text: #303030;
+    --disabled_button_background_color: #495762;
+    --disabled_button_border_color: #686c68;
+
+		/*Home Tab*/
+	--button_text: #e0e0e0;
+	--button_background: #283445;
+		
+		/*Alternate Button*/
+	--alternate_button_text: #e0e0e0;
+	--alternate_button_background: #283445;
+
+	/*Buttons -> Icon Button*/
+	--icon_button_background:;
+	--icon_button_color:;
+	--icon_button_border_color:;
+
+	/*Context Menu*/
+	--context_menu_text:;
+	--context_menu_background:;
+	--context_menu_border:;
+	--context_menu_division:;
+	--context_menu_hover_text:;
+	--context_menu_hover_background:;
+
+	/*Sequence, AKA Gens Per Action*/
+	--sequence_area_background: #111820;
+    --sequence_background: #eedcb8;
+	--sequence_text: #e0e0e0;
+
+	/*Side Menus*/
+	--tab_color: #243047;
+
+	--flyout_background: #18222d;
+	--flyout_background_pinned: #18222d;
+
+	--setting_background: #273141;
+    --setting_text: #e0e0e0;
+
+	--sample_order_select_color: #1f2934;
+	--sample_order_select_color_text: #eedcb8;
+
+	--dropdown_text: #e0e0e0;
+	--dropdown_background: #212935;
+
+	--rangeslider_background_color: #1f2934;
+	--rangeslider_color: #1f2934;
+	--rangeslider_circle_color: #404d64;
+
+	--help_icon: #7c8389;
+	--tooltip_text: #e0e0e0;
+    --tooltip_background: #303c50;
+	--setting_category_help_text_color: #E0E0E0;
+	
+	--setting_footer_border_color: #334552;
+	--setting_footer_text_color: #e0e0e0;
+	--setting_footer_background_color: #18222d;
+
+	/*Substitution Card*/
+	--substitution_card_input_border:;
+	--substitution_card_input_background:;
+
+	/*Palette Card*/
+	--palette_card_background: #273141;
+	--palette_card_text: #e0e0e0;
+	--palette_table_border: #607c90;
+
+	/*World Info*/
+	--wi_card_border_color: #334552;
+    --wi_card_border_color_to_ai: #eedcb880;
+
+    --wi_card_bg_color: #223040;
+	--wi_card_text_color: #e0e0e0;
+
+    --wi_card_tag_bg_color: #1d2835;
+	--wi_card_tag_text_color: #e0e0e0;
+	
+    --wi_tag_color: #283445;
+	--wi_tag_text_color: #e0e0e0;
+	
+	/*Popup*/
+	--popup_background_color: #1a2530;
+    --popup_title_bar_color: #283445;
+	--popup_title_bar_color_text: #e0e0e0;
+
+    --popup_item_color: #1a2530;
+	--popup_item_color_text: #e0e0e0;
+
+	--popup_hover_color: #1e2733;
+	--popup_hover_color_text: #e0e0e0;
+	--popup_selected_color: #242d3c;
+	--popup_selected_color_text: #eedcb8;
+
+	--popup_button_color: #283445;
+	--popup_button_color_text: #e0e0e0;
+	--popup_cancel_button_color: #25364a;
+	--popup_cancel_button_color_text: #e0e0e0;
+
+	--error: #19242c;
+	--error_text: #e0e0e0;
+	--error_title: #25364a;
+	--error_title_text: #e0e0e0;
+	
+	/*Context Bar Colors*/
+	--context_colors_memory: #04325c;
+	--context_colors_authors_notes: #165a62;
+	--context_colors_world_info: #1864a3;
+	--context_colors_prompt: #868686;
+	--context_colors_game_text: #63710e;
+	--context_colors_submit: #ffffff00;
+	--context_colors_unused: #ffffff24;
+	--context_colors_soft_prompt: #141414;
+	--context_colors_genre: #2c5c88;
+
+    /*Parameters*/
+	--scrollbar-size: 6px;
+	--palette_card_shadow: 0;
+	--wi_card_shadow: 0;
+	--light_shadow_value: 0;
+	--left_menu_strong_shadow: 0;
+	--right_menu_light_shadow: 0;
+	--right_menu_strong_shadow: 0;
+	--context_menu_shadow: var(--wi_card_shadow);	
+	--radius_inputbox: 2px;
+	--radius_unpinned_menu: 2px;
+	--radius_sequence: 5px;
+	--radius_settings_background: 2px;
+	--radius_button: 2px;
+	--radius_alternate_button: 2px;
+	--radius_item_popup: 2px;
+	--radius_wi_card: 5px;
+	--radius_palette_card: 5px;
+	--radius_settings_button: 2px;
+	--tabs_rounding: 2px;
+	--radius_context_menu: 2px;
+	--radius_context_menu_hover: 2px;
+	--radius_genre_tag: 2px;
+	--radius_tooltip: 2px;
+
+
+
+
+
+/*----------------VARIABLES--------------------*/
+	--flyout_menu_closed_width: 0px;
+	--setting_menu_closed_width_no_pins_width:  0px;
+	--story_options_size: 30%;
+	--story_pinned_areas_left:"menuicon options gamescreen lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_areas_right:"menuicon gamescreen options lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_area_widths_left: 30pxvar(--story_options_size) auto 30px;
+	--story_pinned_area_widths_right: 30pxautovar(--story_options_size) 30px;
+	--story_pinned_areas:var(--story_pinned_areas_left);
+	--story_pinned_area_widths:var(--story_pinned_area_widths_left);
+	--font_size_adjustment: 0px;
+	--game_screen_font_size_adjustment: 1;}
+}
+
+/*---------------------------------- Popups -------------------------------------------------*/
+@media only screen and (max-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 10%;
+		z-index: 999;
+		width: 80%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+@media only screen and (min-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 25%;
+		z-index: 999;
+		width: 50%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+.popup .title {
+	width: 100%;
+	background-color: var(--popup_title_bar_color);
+	color: var(--popup_title_bar_color_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+.popup .action_button {
+	background-color: var(--popup_button_color);
+	color: var(--popup_button_color_text);
+}
+
+.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	color: var(--popup_item_color_text);
+	
+}
+
+#modelspecifier, .popup .model_item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+	display: grid;
+	grid-template-areas: "folder_icon delete_icon edit_icon rename_icon file gpu_size warning_icon downloaded_icon";
+	grid-template-columns: 30px 0px 0px 0px auto 50px 30px 30px;
+	
+}
+
+.popup .model_item .folder_icon {
+	grid-area: folder_icon;
+}
+
+.popup .model_item .edit_icon {
+	grid-area: edit_icon;
+}
+
+.popup .model_item .rename_icon {
+	grid-area: rename_icon;
+}
+
+.popup .model_item .delete_icon {
+	grid-area: delete_icon;
+}
+
+.popup .model_item .model {
+	cursor: pointer;
+	grid-area: file;
+}
+
+.popup .header {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	padding: 2px;
+}
+
+.popup .item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+}
+
+.popup .item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .item.selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.popup .popup_load_cancel {
+	text-align: center;
+	vertical-align: bottom;
+	color: var(--popup_title_bar_color_text);
+	background-color: var(--popup_title_bar_color);
+	padding: 0 10px 0 10px;
+}
+
+
+.popup_load_cancel_button {
+	color: var(--popup_cancel_button_color_text);
+	border-color: var(--popup_cancel_button_color_text);
+	background-color: var(--popup_cancel_button_color);
+	vertical-align: bottom;
+	display: inline;
+}
+
+.table-header-container {
+	display: flex;
+	justify-content: space-between;
+	cursor: pointer;
+}
+
+.table-header-sort-icon {
+	margin-right: 10px;
+	margin-top: 2px;
+}
+
+.table-header-label {
+	margin-top: 4px;
+}
+
+#error_message.popup {
+	background-color: var(--error);
+	color: var(--error_text);
+	overflow: hidden;
+}
+
+#error_message .title {
+	width: 100%;
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+#error_message.popup .btn-primary {
+	background-color: var(--error);
+	color: var(--error_text);
+	border-color: var(--error_text);
+}
+
+
+#error_message .popup_load_cancel {
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+}
+
+
+#error_message.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	background-color: var(--error);
+	color: var(--error_text);
+}
+
+.breadcrumbitem {
+	padding: 5px 10px 5px 10px;
+	color: #ffffff;
+	background-color: transparent;
+	border: none;
+	
+	-moz-transition: background-color 0.25s ease-in;
+	-o-transition: background-color 0.25s ease-in;
+	-webkit-transition: background-color 0.25s ease-in;
+	transition: background-color 0.25s ease-in;
+}
+
+.breadcrumbitem:hover {
+	cursor: pointer;
+	background-color: #688f1f;
+}
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
+/*----------------------------- Model Load Popup ------------------------------------------*/
+
+#specspan, .popup_list_area .model_item .model {
+	grid-area: file;
+	display: grid;
+	grid-template-areas: "item gpu_size";
+	grid-template-columns: auto 95px;
+	cursor: pointer;
+}
+
+#specspan {
+	grid-template-columns: auto 100px !important;
+	cursor: auto !important;
+}
+
+#model-spec-usage {
+	position: relative;
+	left: -20px;
+}
+
+.popup .model_item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .model_item .selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.model_setting_container {
+	display: grid;
+	grid-template-areas: "label label"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px 16px 19px;
+	grid-template-columns: auto 40px;
+	row-gap: 0.2em;
+	border: 1px;
+	margin: 2px;
+}
+
+.model_setting_minlabel {
+	grid-area: minlabel;
+	padding-top: 3px;
+	color: var(--popup_title_bar_color_text);
+	overflow: hidden;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_maxlabel {
+	color: var(--popup_title_bar_color_text);
+	padding-top: 3px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_label {
+	color: var(--popup_title_bar_color_text);
+	grid-area: label;
+	overflow: hidden;
+	text-align: left;
+}
+
+.model_setting_value {
+	color: var(--popup_title_bar_color_text);
+	text-align: left;
+	grid-area: label;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.model_setting_value:focus {
+	color: var(--text_edit);
+}
+
+.model_setting_item {
+
+	grid-area: item;
+	overflow: hidden;
+}
+
+.model_setting_item_input {
+	width:95%;
+}
+
+@font-face {
+  font-family: 'Material Icons Outlined';
+  font-style: normal;
+  src: url(/static/MaterialIconsOutlined-Regular.otf) format('opentype');
+}
+
+.material-icons-outlined {
+  font-family: 'Material Icons Outlined';
+  font-weight: normal;
+  font-style: normal;
+  font-size: calc(24px + var(--font_size_adjustment));  /* Preferred icon size */
+  display: inline-block;
+  line-height: 1;
+  text-transform: none;
+  letter-spacing: normal;
+  word-wrap: normal;
+  white-space: nowrap;
+  direction: ltr;
+
+  /* Support for all WebKit browsers. */
+  -webkit-font-smoothing: antialiased;
+  /* Support for Safari and Chrome. */
+  text-rendering: optimizeLegibility;
+
+  /* Support for Firefox. */
+  -moz-osx-font-smoothing: grayscale;
+
+  /* Support for IE. */
+  font-feature-settings: 'liga';
+}
+
+.material-icons-outlined.cursor:hover{
+	filter: brightness(85%);
+}
+
+.setting_label .helpicon {
+	color: var(--help_icon);
+	cursor: help;
+	font-size: calc(14px + var(--font_size_adjustment)) !important;
+	flex: auto;
+	width: 15px;
+	align-self: flex-end;
+
+	line-height: inherit;
+	border-radius: inherit;
+	margin-right: inherit;
+	padding: inherit;
+	background: inherit;
+	border: inherit;
+	text-decoration: inherit;
+	
+}
+
+#tooltip-text {
+	content: attr(tooltip);
+	position: fixed;
+	transition: opacity  0s linear 0.5s;
+	white-space: normal;
+	border-radius: var(--radius_tooltip);
+
+	opacity: 1;
+	
+	padding: 7px;
+	color: var(--tooltip_text);
+	background-color: var(--tooltip_background);
+
+	pointer-events: none;
+	z-index: 9999999;
+}
+
+.tooltip-standard {
+	border: 1px ridge grey;
+	font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
+	width: min-context;
+	max-width: 25%;
+	/*margin-right: -3px;*/
+}
+
+.tooltip-context-token {
+	border: none;
+	font-family: monospace;
+	max-width: min-content;
+}
+
+
+/* Mobile tooltips */
+@media (pointer: coarse), (hover: none) {
+	[tooltip]:after {
+		opacity: 0;
+		content: "";
+	}
+
+	[tooltip]:hover::after {
+		content: attr(tooltip);
+		position: fixed;
+
+		top: calc(var(--mouse-y) * 100vh);
+		left: calc(var(--mouse-x) * 100vw);
+		transform: translate(var(--tooltip_x), var(--tooltip-y));
+		transition: opacity  0s linear 0.5s;
+		opacity: 1;
+		
+
+		padding: 0px 2px;
+		background-color: rgba(0, 0, 0, 0.6);
+
+		pointer-events: none;
+		z-index: 9999999;
+	}
+}
+
+.popup .model_item .model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
+}
+
+.settings_select {
+	color: var(--dropdown_text);
+	background: var(--dropdown_background);
+	margin-left: auto;
+	margin-right: 25px;
+}
+
+.setting_value {
+	text-align: right;
+	grid-area: value;
+	font-size: calc(12px + var(--font_size_adjustment));
+	padding: 2px;
+	padding-top: 0px;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.setting_value:focus {
+	color: var(--text_edit);
+}
+
+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px auto 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
+.setting_minlabel {
+	padding-top: 6px;
+	grid-area: minlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.setting_maxlabel {
+	padding-top: 6px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
+}
+
+.setting_label {
+	display: flex;
+	grid-area: label;
+	overflow: hidden;
+	padding: 5px;
+	padding-right: 0px;
+	padding-top: 0px;
+}
+
+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
+}
+
+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
 }
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding);
 }


+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px auto 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
 .setting_minlabel {
 	padding-top: 6px;
 	grid-area: minlabel;
@@ -364,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding);
 	padding-top: 0px;
 	text-align: right;
 	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
 }

 .setting_label {
@@ -1990,6 +2013,11 @@ body {
 	grid-area: file;
 }

+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
+}
+
 .popup .header {
 	width: 98%;
 	background-color: var(--popup_item_color);
@@ -2102,6 +2130,13 @@ body {
 	cursor: pointer;
 	background-color: #688f1f;
 }
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
 /*----------------------------- Model Load Popup ------------------------------------------*/

 #specspan, .popup_list_area .model_item .model {
@@ -3370,6 +3405,23 @@ textarea {
  }
 }

+@keyframes pulse-red {
+  0% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
+  }
+  
+  70% {
+    transform: scale(1);
+    box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
+  }
+  
+  100% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
+  }
+}
+
@keyframes pulse-text {
  0% {
    filter: blur(3px);
@@ -3391,6 +3443,11 @@ textarea {
  }
 }

+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
+}
+
 .single_pulse {
 	animation: pulse-text 0.5s 1;
 }
@@ -3495,7 +3552,7 @@ h2 .material-icons-outlined {
 }


-.horde_trigger[model_model="ReadOnly"],
+.horde_trigger[model_model="Read Only"],
 .horde_trigger[model_model="CLUSTER"] {
 	display: none;
 }
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -14,7 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);});
 socket.on('popup_items', function(data){popup_items(data);});
 socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 socket.on('popup_edit_file', function(data){popup_edit_file(data);});
-socket.on('show_model_menu', function(data){show_model_menu(data);});
+//socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){show_model_menu(data);});
 socket.on('selected_model_info', function(data){selected_model_info(data);});
 socket.on('oai_engines', function(data){oai_engines(data);});
 socket.on('buildload', function(data){buildload(data);});
@@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true";
 let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
+var selected_model_data;

 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -1501,48 +1503,50 @@ function getModelParameterCount(modelName) {
 }

 function show_model_menu(data) {
-	//clear old options
-	document.getElementById("modelkey").classList.add("hidden");
-	document.getElementById("modelkey").value = "";
-	document.getElementById("modelurl").classList.add("hidden");
-	document.getElementById("use_gpu_div").classList.add("hidden");
-	document.getElementById("use_8_bit_div").classList.add("hidden");
-	document.getElementById("modellayers").classList.add("hidden");
-	document.getElementById("oaimodel").classList.add("hidden");
-	var model_layer_bars = document.getElementById('model_layer_bars');
-	while (model_layer_bars.firstChild) {
-		model_layer_bars.removeChild(model_layer_bars.firstChild);
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
 	
 	//clear out the breadcrumbs
 	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
 	while (breadcrumbs.firstChild) {
 		breadcrumbs.removeChild(breadcrumbs.firstChild);
 	}
-	//add breadcrumbs
-	//console.log(data.breadcrumbs);
-	for (item of data.breadcrumbs) {
-		var button = document.createElement("button");
-		button.classList.add("breadcrumbitem");
-		button.setAttribute("model", data.menu);
-		button.setAttribute("folder", item[0]);
-		button.textContent = item[1];
-		button.onclick = function () {
-					socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")});
-				};
-		breadcrumbs.append(button);
-		var span = document.createElement("span");
-		span.textContent = "\\";
-		breadcrumbs.append(span);
-	}
 	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
 	//clear out the items
 	var model_list = document.getElementById('loadmodellistcontent')
 	while (model_list.firstChild) {
 		model_list.removeChild(model_list.firstChild);
 	}
 	//add items
-	for (item of data.data) {
+	for (item of data.items) {
 		var list_item = document.createElement("span");
 		list_item.classList.add("model_item");
 		
@@ -1564,10 +1568,33 @@ function show_model_menu(data) {
 		//create the actual item
 		var popup_item = document.createElement("span");
 		popup_item.classList.add("model");
-		popup_item.setAttribute("display_name", item.label);
-		popup_item.id = item.name;
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
+		}
 		
-		popup_item.setAttribute("Menu", data.menu)
 		//name text
 		var text = document.createElement("span");
 		text.style="grid-area: item;";
@@ -1615,241 +1642,327 @@ function show_model_menu(data) {
 			});
 		})();
 		
-		popup_item.onclick = function () {
-						var accept = document.getElementById("btn_loadmodelaccept");
-						accept.classList.add("disabled");
-						socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")});
-						var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected");
-						for (model of model_list) {
-							model.classList.remove("selected");
-						}
-						this.classList.add("selected");
-						accept.setAttribute("selected_model", this.id);
-						accept.setAttribute("menu", this.getAttribute("Menu"));
-						accept.setAttribute("display_name", this.getAttribute("display_name"));
-					};
 		list_item.append(popup_item);
-		
-		
 		model_list.append(list_item);
 	}
-	var accept = document.getElementById("btn_loadmodelaccept");
-	accept.disabled = true;
 	
-	//finally, if they selected the custom hugging face menu we show the input box
-	if (data['menu'] == "customhuggingface") {
-		document.getElementById("custommodelname").classList.remove("hidden");
-	} else {
-		document.getElementById("custommodelname").classList.add("hidden");
-	}
-
-
-	// detect if we are in a model selection screen and show the reference
-	var refelement = document.getElementById("modelspecifier");
-	var check = document.getElementById("mainmenu");
-	if (check) {
-		refelement.classList.remove("hidden");
-	} else {
-		refelement.classList.add("hidden");
-	}
 	
 	openPopup("load-model");
+	
 }

-function selected_model_info(data) {
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
+					}
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
+			}
+		}
+	}
 	var accept = document.getElementById("btn_loadmodelaccept");
-	//hide or unhide key
-	if (data.key) {
-		document.getElementById("modelkey").classList.remove("hidden");
-		document.getElementById("modelkey").value = data.key_value;
-	} else {
-		document.getElementById("modelkey").classList.add("hidden");
-		document.getElementById("modelkey").value = "";
-	}
-	//hide or unhide URL
-	if  (data.url) {
-		document.getElementById("modelurl").classList.remove("hidden");
-	} else {
-		document.getElementById("modelurl").classList.add("hidden");
-	}
-	
-	//hide or unhide 8 bit mode
-	if (data.bit_8_available) {
-		document.getElementById("use_8_bit_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_8_bit_div").classList.add("hidden");
-		document.getElementById("use_8_bit").checked = false;
-	}
-	
-	//default URL loading
-	if (data.default_url != null) {
-		document.getElementById("modelurl").value = data.default_url;
-	}
-	
-	//change model loading on url if needed
-	if (data.models_on_url) {
-		document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});};
-		document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});};
-	} else {
-		document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});};
-		document.getElementById("modelurl").ochange = null;
-	}
-	
-	//show model select for APIs
-	if (data.show_online_model_select) {
-		document.getElementById("oaimodel").classList.remove("hidden");
-	} else {
-		document.getElementById("oaimodel").classList.add("hidden");
-	}
-	
-	//Multiple Model Select?
-	if (data.multi_online_models) {
-		document.getElementById("oaimodel").setAttribute("multiple", "");
-		document.getElementById("oaimodel").options[0].textContent = "All"
-	} else {
-		document.getElementById("oaimodel").removeAttribute("multiple");
-		document.getElementById("oaimodel").options[0].textContent = "Select Model(s)"
-	}
-	
-	//hide or unhide the use gpu checkbox
-	if  (data.gpu) {
-		document.getElementById("use_gpu_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_gpu_div").classList.add("hidden");
-	}
-	//setup breakmodel
-	if (data.breakmodel) {
-		document.getElementById("modellayers").classList.remove("hidden");
-		//setup model layer count
-		document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0);
-		document.getElementById("gpu_layers_max").textContent = data.layer_count;
-		document.getElementById("gpu_count").value = data.gpu_count;
-		
-		//create the gpu load bars
-		var model_layer_bars = document.getElementById('model_layer_bars');
-		while (model_layer_bars.firstChild) {
-			model_layer_bars.removeChild(model_layer_bars.firstChild);
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
 		}
-		
-		//Add the bars
-		for (let i = 0; i < data.gpu_names.length; i++) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "gpu_layers_box_"+i;
-			input.value = data.break_values[i];
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.break_values[i];
-			input.id = "gpu_layers_" + i;
-			input.onchange = function () {
-								document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-			
-			model_layer_bars.append(div);
-		}
-		
-		//add the disk layers
-		if (data.disk_break) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "Disk cache: "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "disk_layers_box";
-			input.value = data.disk_break_value;
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.disk_break_value;
-			input.id = "disk_layers";
-			input.onchange = function () {
-								document.getElementById(this.id+"_box").value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-		}
-		
-		model_layer_bars.append(div);
-		
-		update_gpu_layers();
-	} else {
-		document.getElementById("modellayers").classList.add("hidden");
+	}
+	
+	if (ok_to_load) {
 		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
 	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
+}
+
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
+	var accept = document.getElementById("btn_loadmodelaccept");
 	accept.disabled = false;
 	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		
+		//create the user input for each requested input
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = model_settings_checker;
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
+				slider.onchange();
+				slider.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').remove();
+			}
+			if (item['uitype'] == "toggle") {
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.noresubmit = true;
+				select_element.onchange();
+				select_element.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.noresubmit = true;
+				password_item.onchange();
+				password_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').remove();
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.noresubmit = true;
+				text_item.onchange();
+				text_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').remove();
+			}
+			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
+	
+	model_settings_checker()
 	
 }

@@ -1877,42 +1990,37 @@ function update_gpu_layers() {

 function load_model() {
 	var accept = document.getElementById('btn_loadmodelaccept');
-	gpu_layers = []
-	disk_layers = 0;
-	if (!(document.getElementById("modellayers").classList.contains("hidden"))) {
-		for (let i=0; i < document.getElementById("gpu_count").value; i++) {
-			gpu_layers.push(document.getElementById("gpu_layers_"+i).value);
-		}
-		if (document.getElementById("disk_layers")) {
-			disk_layers = document.getElementById("disk_layers").value;
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+	
+	//get an object of all the input settings from the user
+	data = {}
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
 	}
-	//Need to do different stuff with custom models
-	if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu");
-		var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name");
-	} else {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model");
-		var path = "";
-	}
+	data = {...data, ...selected_model_data};
 	
-	let selected_models = [];
-	for (item of document.getElementById("oaimodel").selectedOptions) {
-		selected_models.push(item.value);
-	}
-	if (selected_models == ['']) {
-
-		selected_models = [];
-	} else if (selected_models.length == 1) {
-		selected_models = selected_models[0];
-	}
+	data['plugin'] = document.getElementById("modelplugin").value;
 	
-	message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked, 
-			   'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(), 
-			   'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value, 
-			   'online_model': selected_models,
-			   'use_8_bit': document.getElementById('use_8_bit').checked};
-	socket.emit("load_model", message);
+	socket.emit("load_model", data);
 	closePopups();
 }

--- a/templates/index.html
+++ b/templates/index.html
@@ -18,7 +18,7 @@
 	<script src="static/bootstrap.min.js"></script>
 	<script src="static/bootstrap-toggle.min.js"></script>
 	<script src="static/rangy-core.min.js"></script>
-	<script src="static/application.js?ver=1.18.1f"></script>
+	<script defer src="static/application.js?ver=1.18.1f"></script>
 	<script src="static/favicon.js"></script>
 </head>
 <body>
@@ -283,59 +283,7 @@
 			</div>
 		</div>
 	</div>
-	<div class="popupcontainer hidden" id="loadmodelcontainer">
-		<div class="loadpopup">
-			<div class="popuptitlebar">
-				<div class="popuptitletext">Select A Model To Load</div>
-			</div>
-			<div id="loadmodellistbreadcrumbs">
-				
-			</div>
-			<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
-			</div>
-			<div class="popupfooter">
-				<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
-				<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
-			</div>
-			<div class="popupfooter">
-				<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
-			</div>
-			<div class="popupfooter hidden" id=modellayers>
-				<div class='settingitem' style="width:100%">
-					<div class='settinglabel'>
-						<div class="justifyleft">
-							GPU/Disk Layers
-							<span class="helpicon">?
-								<span class="helptext">Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.</span>
-							</span>
-						</div>
-						<div class="justifyright" id="gpu_layers_current">0</div>
-					</div>
-					<div id=model_layer_bars style="color: white">
-						
-					</div>
-					<input type=hidden id='gpu_count' value=0/>
-					<div class="settingminmax">
-						<div class="justifyleft">
-							0
-						</div>
-						<div class="justifyright" id="gpu_layers_max">
-							24
-						</div>
-					</div>
-				</div>
-			</div>
-			<div class="popupfooter">
-				<button type="button" class="btn btn-primary" id="btn_loadmodelaccept">Load</button>
-				<button type="button" class="btn btn-primary" id="btn_loadmodelclose">Cancel</button>
-				<div class="box flex-push-right hidden" id=use_gpu_div>
-					<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-					<div class="box-label">Use GPU</div>
-				</div>
-			</div>
-		</div>
-	</div>
+	
 	<div class="popupcontainer hidden" id="spcontainer">
 		<div id="sppopup">
 			<div class="popuptitlebar">
@@ -513,6 +461,12 @@
 			</div>
 		</div>
 	</div>
+	<!------------- Pop-Ups ------------------------------->
+	{% include 'popups.html' %}
 	
+	<!------------- Templates ------------------------------->
+	<div class="hidden">
+		{% include 'templates.html' %}
+	</div>
 </body>
 </html>
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -46,35 +46,11 @@
 				<div id="model-spec-usage">Usage (VRAM)</div>
 			</span>
 		</span>
-		<div id="loadmodellistbreadcrumbs">
-			
-		</div>
+		<div id="loadmodellistbreadcrumbs"></div>
 		<div id="loadmodellistcontent" class="popup_list_area"></div>
+		<div id="loadmodelplugin" class="popup_load_cancel"><select id="modelplugin" class="settings_select hidden"></select></div>
+		<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
 		<div class="popup_load_cancel">
-			<div>
-				<input class="hidden fullwidth" type="text" placeholder="key" id="modelkey" onchange="socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});">
-				<input class="hidden fullwidth" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="hidden fullwidth" type="text" placeholder="Hugging Face Model Name" id="custommodelname" menu="" onblur="socket.emit('get_model_info', this.value);
-																																	   document.getElementById('btn_loadmodelaccept').setAttribute('selected_model', this.value);
-																																	   ">
-				<select class="hidden fullwidth settings_select" id="oaimodel"><option value="">Select OAI Model</option></select>
-			</div>
-			<div class="hidden" id=modellayers>
-				<div class="justifyleft">
-					GPU/Disk Layers<span class="material-icons-outlined helpicon" tooltip="Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.">help_icon</span>
-				</div>
-				<div class="justifyright"><span id="gpu_layers_current">0</span>/<span id="gpu_layers_max">0</span></div>
-				<div id=model_layer_bars style="color: white"></div>
-				<input type=hidden id='gpu_count' value=0/>
-			</div>
-			<div class="box flex-push-right hidden" id=use_gpu_div>
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-				<div class="box-label">Use GPU</div>
-			</div>
-			<div class="box flex-push-right hidden" id=use_8_bit_div onclick="set_8_bit_mode()">
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_8_bit" checked>
-				<div class="box-label">Use 8 bit mode</div>
-			</div>
 			<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>
 			<button type="button" class="btn popup_load_cancel_button" onclick='closePopups();' id="btn_loadmodelclose">Cancel</button>
 		</div>
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -1,5 +1,4 @@
 <!---------------- World Info Card ---------------------->
-<link href="static/koboldai.css" rel="stylesheet">
 <div draggable="true" class="world_info_card" id="world_info_">
 	<div class="world_info_title_area">
 		<div>
@@ -154,3 +153,21 @@
 		</div>
 	</div>
 </div>
+<!---------------- Model Settings ---------------------->
+<div id="blank_model_settings" class="setting_container_model">
+	<span class="setting_label">
+		<span id="blank_model_settings_label">:&nbsp;</span><span id="blank_model_settings_tooltip" class="helpicon material-icons-outlined" style="text-align: left;" tooltip="">help_icon</span>
+	</span>
+	<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
+	<span class="setting_item">
+		<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
+		<!--<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">-->
+		<span id="blank_model_settings_toggle"></span>
+		<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
+		<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
+		<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">
+	</span>
+	<span class="setting_minlabel"><span style="position: relative;" id="blank_model_settings_min_label"></span></span>
+	<span class="setting_maxlabel"><span style="position: relative;" id="blank_model_settings_max_label"></span></span>
+	</span>
+</div>
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
    return carry

 class PenalizingCausalTransformer(CausalTransformer):
-    def __init__(self, config, **kwargs):
+    def __init__(self, badwordsids, config, **kwargs):
        # Initialize
        super().__init__(config, **kwargs)
        def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None):
            compiling_callback()
            numseqs = numseqs_aux.shape[0]
            # These are the tokens that we don't want the AI to ever write
-            badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+            badwords = jnp.array(badwordsids).squeeze()
            @hk.transform
            def generate_sample(context, ctx_length):
                # Give the initial context to the transformer
@@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):

    koboldai_vars.status_message = ""

-def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+import koboldai_settings
+
+def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
    global thread_resources_env, seq, tokenizer, network, params, pad_token_id

    if kwargs.get("pad_token_id"):
@@ -989,9 +991,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa

    # Try to convert HF config.json to MTJ config
    if hf_checkpoint:
-        spec_path = os.path.join("maps", koboldai_vars.model_type + ".json")
+        spec_path = os.path.join("maps", model_type + ".json")
        if not os.path.isfile(spec_path):
-            raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}")
+            raise NotImplementedError(f"Unsupported model type {repr(model_type)}")
        with open(spec_path) as f:
            lazy_load_spec = json.load(f)

@@ -1119,12 +1121,12 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa

    global badwords
    # These are the tokens that we don't want the AI to ever write
-    badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+    badwords = jnp.array(badwordsids).squeeze()

    if not path.endswith("/"):
        path += "/"

-    network = PenalizingCausalTransformer(params, dematerialized=True)
+    network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True)

    if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX":
        network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])