Merge commit 'refs/pull/110/head' of https://github.com/ebolam/KoboldAI into UI2

2025-06-05 21:59:24 +02:00 · 2022-09-06 09:53:00 -04:00
parent ab486e4d2b cf3f38b90d
commit 777607553e
3 changed files with 332 additions and 41 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -221,6 +221,7 @@ model_menu = {
        ["InferKit API (requires API key)", "InferKit", "", False],
        # ["KoboldAI Server API (Old Google Colab)", "Colab", "", False],
        ["KoboldAI API", "API", "", False],
+        ["KoboldAI Horde", "CLUSTER", "", False],
        ["Return to Main Menu", "mainmenu", "", True],
    ]
    }
@@ -467,6 +468,18 @@ api_v1 = KoboldAPISpec(
    tags=tags,
 )

+# Returns the expected config filename for the current setup.
+# If the model_name is specified, it returns what the settings file would be for that model
+def get_config_filename(model_name = None):
+    if model_name:
+        return(f"settings/{model_name.replace('/', '_')}.settings")
+    elif args.configname:
+        return(f"settings/{args.configname.replace('/', '_')}.settings")
+    elif vars.configname != '':
+        return(f"settings/{vars.configname.replace('/', '_')}.settings")
+    else:
+        print(f"Empty configfile name sent back. Defaulting to ReadOnly")
+        return(f"settings/ReadOnly.settings")
 #==================================================================#
 # Function to get model selection at startup
 #==================================================================#
@@ -578,9 +591,8 @@ def check_if_dir_is_model(path):
 # Return Model Name
 #==================================================================#
 def getmodelname():
-    if(args.configname):
-       modelname = args.configname
-       return modelname
+    if(vars.online_model != ''):
+       return(f"{vars.model}/{vars.online_model}")
    if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
        modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
        return modelname
@@ -996,6 +1008,8 @@ def general_startup(override_args=None):
    parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
    parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
    parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
+    parser.add_argument("--apikey", help="Specify the API key to use for online services")
+    parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
    parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
    parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
    parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
@@ -1058,6 +1072,11 @@ def general_startup(override_args=None):
    koboldai_vars.model = args.model;
    koboldai_vars.revision = args.revision

+    if args.apikey:
+        vars.apikey = args.apikey
+    if args.req_model:
+        vars.cluster_requested_models = args.req_model
+
    if args.colab:
        args.remote = True;
        args.override_rename = True;
@@ -1156,12 +1175,30 @@ def get_model_info(model, directory=""):
    key_value = ""
    break_values = []
    url = False
+    default_url = None
+    models_on_url = False
+    multi_online_models = False
    gpu_count = torch.cuda.device_count()
    gpu_names = []
    for i in range(gpu_count):
        gpu_names.append(torch.cuda.get_device_name(i))
    if model in ['Colab', 'API']:
        url = True
+    elif model == 'CLUSTER':
+        models_on_url = True
+        url = True
+        key = True
+        default_url = 'https://koboldai.net'
+        multi_online_models = True
+        if path.exists(get_config_filename(model)):
+            with open(get_config_filename(model), "r") as file:
+                # Check if API key exists
+                js = json.load(file)
+                if("apikey" in js and js["apikey"] != ""):
+                    # API key exists, grab it and close the file
+                    key_value = js["apikey"]
+                elif 'oaiapikey' in js and js['oaiapikey'] != "":
+                    key_value = js["oaiapikey"]
    elif model in [x[1] for x in model_menu['apilist']]:
        if path.exists("settings/{}.v2_settings".format(model)):
            with open("settings/{}.v2_settings".format(model), "r") as file:
@@ -1202,13 +1239,13 @@ def get_model_info(model, directory=""):
                break_values = [layer_count]
            break_values = [int(x) for x in break_values if x != '']
            break_values += [0] * (gpu_count - len(break_values))
-    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 
+    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, 
                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
                         'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE,
                         'break_values': break_values, 'gpu_count': gpu_count,
                         'url': url, 'gpu_names': gpu_names}, broadcast=True, room="UI_1")
    emit('selected_model_info', {'key_value': key_value, 'key':key, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
+                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, 
                         'disk_break_value': disk_blocks, 'disk_break': utils.HAS_ACCELERATE,
                         'break_values': break_values, 'gpu_count': gpu_count,
                         'url': url, 'gpu_names': gpu_names}, broadcast=False, room="UI_2")
@@ -1216,7 +1253,7 @@ def get_model_info(model, directory=""):
    

 def get_layer_count(model, directory=""):
-    if(model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
+    if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
        if(model == "GPT2Custom"):
            with open(os.path.join(directory, "config.json"), "r") as f:
                model_config = json.load(f)
@@ -1283,6 +1320,8 @@ def get_oai_models(data):
                if "apikey" in js:
                    if js['apikey'] != key:
                        changed=True
+        else:
+            changed=True
        if changed:
            with open("settings/{}.v2_settings".format(model), "w") as file:
                js["apikey"] = key
@@ -1296,6 +1335,55 @@ def get_oai_models(data):
        print(req.json())
        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})

+def get_cluster_models(msg):
+    vars.oaiapikey = msg['key']
+    vars.apikey = vars.oaiapikey
+    url = msg['url']
+    
+        
+    # Get list of models from public cluster
+    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
+    req = requests.get("{}/models".format(url))
+    if(req.status_code == 200):
+        engines = req.json()
+        print(engines)
+        try:
+            engines = [[en, en] for en in engines]
+        except:
+            print(engines)
+            raise
+        print(engines)
+        
+        online_model = ""
+        changed=False
+        
+        #Save the key
+        if not path.exists("settings"):
+            # If the client settings file doesn't exist, create it
+            # Write API key to file
+            os.makedirs('settings', exist_ok=True)
+        if path.exists(get_config_filename(vars.model_selected)):
+            with open(get_config_filename(vars.model_selected), "r") as file:
+                js = json.load(file)
+                if 'online_model' in js:
+                    online_model = js['online_model']
+                if "apikey" in js:
+                    if js['apikey'] != vars.oaiapikey:
+                        changed=True
+        else:
+            changed=True
+        if changed:
+            js={}
+            with open(get_config_filename(vars.model_selected), "w") as file:
+                js["apikey"] = vars.oaiapikey
+                file.write(json.dumps(js, indent=3))
+            
+        emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True)
+    else:
+        # Something went wrong, print the message and quit since we can't initialize an engine
+        print("{0}ERROR!{1}".format(colors.RED, colors.END))
+        print(req.json())
+        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})

 # Function to patch transformers to use our soft prompt
 def patch_causallm(model):
@@ -1808,6 +1896,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
    model = None
    generator = None
    model_config = None
+    vars.online_model = ''
    with torch.no_grad():
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
@@ -1827,9 +1916,24 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
    #Reload our badwords
    koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
    
+    if online_model == "":
+        vars.configname = vars.model.replace('/', '_')
    #Let's set the GooseAI or OpenAI server URLs if that's applicable
-    if online_model != "":
-        if path.exists("settings/{}.v2_settings".format(koboldai_vars.model)):
+    else:
+        koboldai_vars.online_model = online_model
+        # Swap OAI Server if GooseAI was selected
+        if(koboldai_vars.model == "GooseAI"):
+            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
+            koboldai_vars.model = "OAI"
+            koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
+        elif(koboldai_vars.model == "CLUSTER") and type(online_model) is list:
+                if len(online_model) != 1:
+                    koboldai_vars.configname = koboldai_vars.model
+                else:
+                    koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
+        else:
+            koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
+        if path.exists(get_config_filename()):
            changed=False
            with open("settings/{}.v2_settings".format(koboldai_vars.model), "r") as file:
                # Check if API key exists
@@ -1844,18 +1948,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
            if changed:
                with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
                    file.write(json.dumps(js, indent=3))
+
        # Swap OAI Server if GooseAI was selected
        if(koboldai_vars.model == "GooseAI"):
            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
            koboldai_vars.model = "OAI"
            args.configname = "GooseAI" + "/" + online_model
-        else:
+        elif vars.model != "CLUSTER":
            args.configname = koboldai_vars.model + "/" + online_model
        koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
    
    
    # If transformers model was selected & GPU available, ask to use CPU or GPU
-    if(koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+    if(koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        koboldai_vars.allowsp = True
        # Test for GPU support
        
@@ -1894,7 +1999,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
            print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
            koboldai_vars.model_type = "gpt_neo"

-    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        loadmodelsettings()
        loadsettings()
        print(2)
@@ -1937,18 +2042,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
    if(koboldai_vars.model == "GooseAI"):
        koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
        koboldai_vars.model = "OAI"
-        args.configname = "GooseAI"
+        vars.configname = "GooseAI"

    # Ask for API key if OpenAI was selected
    if(koboldai_vars.model == "OAI"):
-        if not args.configname:
-            args.configname = "OAI"
+        if not vars.configname:
+            vars.configname = "OAI"
        
    if(koboldai_vars.model == "ReadOnly"):
        koboldai_vars.noai = True

    # Start transformers and create pipeline
-    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        if(not koboldai_vars.noai):
            print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
            for m in ("GPTJModel", "XGLMModel"):
@@ -2406,7 +2511,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
            }

        # If we're running Colab or OAI, we still need a tokenizer.
-        if(koboldai_vars.model in ("Colab", "API")):
+        if(koboldai_vars.model in ("Colab", "API", "CLUSTER")):
            from transformers import GPT2TokenizerFast
            tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=koboldai_vars.revision, cache_dir="cache")
            loadsettings()
@@ -3097,7 +3202,7 @@ def lua_set_chunk(k, v):
 def lua_get_modeltype():
    if(koboldai_vars.noai):
        return "readonly"
-    if(koboldai_vars.model in ("Colab", "API", "OAI", "InferKit")):
+    if(koboldai_vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
        return "api"
    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (koboldai_vars.model in ("GPT2Custom", "NeoCustom") or koboldai_vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
        hidden_size = get_hidden_size_from_model(model)
@@ -3126,7 +3231,7 @@ def lua_get_modeltype():
 def lua_get_modelbackend():
    if(koboldai_vars.noai):
        return "readonly"
-    if(koboldai_vars.model in ("Colab", "API", "OAI", "InferKit")):
+    if(koboldai_vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
        return "api"
    if(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
        return "mtj"
@@ -3564,6 +3669,8 @@ def get_message(msg):
    elif(msg['cmd'] == 'list_model'):
        sendModelSelection(menu=msg['data'])
    elif(msg['cmd'] == 'load_model'):
+        print(msg)
+        print(vars.model_selected)
        if not os.path.exists("settings/"):
            os.mkdir("settings")
        changed = True
@@ -3587,6 +3694,14 @@ def get_message(msg):
            f.close()
        koboldai_vars.colaburl = msg['url'] + "/request"
        koboldai_vars.model = koboldai_vars.model_selected
+        if vars.model == "CLUSTER":
+            if type(msg['online_model']) is not list:
+                if msg['online_model'] == '':
+                    vars.cluster_requested_models = []
+                else:
+                    vars.cluster_requested_models = [msg['online_model']]
+            else:
+                vars.cluster_requested_models = msg['online_model']
        load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
    elif(msg['cmd'] == 'show_model'):
        print("Model Name: {}".format(getmodelname()))
@@ -3650,6 +3765,8 @@ def get_message(msg):
            print(colors.RED + "WARNING!!: Someone maliciously attempted to delete " + msg['data'] + " the attempt has been blocked.")
    elif(msg['cmd'] == 'OAI_Key_Update'):
        get_oai_models({'model': koboldai_vars.model, 'key': msg['key']})
+    elif(msg['cmd'] == 'Cluster_Key_Update'):
+        get_cluster_models(msg)
    elif(msg['cmd'] == 'loadselect'):
        koboldai_vars.loadselect = msg["data"]
    elif(msg['cmd'] == 'spselect'):
@@ -3845,7 +3962,7 @@ def check_for_backend_compilation():
            break
    koboldai_vars.checking = False

-def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False):
+def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False):
    # Ignore new submissions if the AI is currently busy
    if(koboldai_vars.aibusy):
        return
@@ -3853,11 +3970,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
    while(True):
        set_aibusy(1)
        koboldai_vars.actions.clear_unused_options()
-        if(koboldai_vars.model == "API"):
+        if(koboldai_vars.model in ["API","CLUSTER"]):
            global tokenizer
-            tokenizer_id = requests.get(
-                koboldai_vars.colaburl[:-8] + "/api/v1/model",
-            ).json()["result"]
+            if koboldai_vars.model == "API":
+                tokenizer_id = requests.get(
+                    koboldai_vars.colaburl[:-8] + "/api/v1/model",
+                ).json()["result"]
+            elif len(koboldai_vars.cluster_requested_models) >= 1:
+                # If the player has requested one or more models, we use the first one for the tokenizer
+                tokenizer_id = koboldai_vars.cluster_requested_models[0]
+            # The cluster can return any number of possible models for each gen, but this happens after this step
+            # So at this point, this is unknown
+            else:
+                tokenizer_id = ""
            if tokenizer_id != koboldai_vars.api_tokenizer_id:
                try:
                    if(os.path.isdir(tokenizer_id)):
@@ -4089,6 +4214,8 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
        raise NotImplementedError("API generation is not supported in old Colab API mode.")
    elif(koboldai_vars.model == "API"):
        raise NotImplementedError("API generation is not supported in API mode.")
+    elif(koboldai_vars.model == "CLUSTER"):
+        raise NotImplementedError("API generation is not supported in API mode.")
    elif(koboldai_vars.model == "OAI"):
        raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.")
    elif(koboldai_vars.model == "ReadOnly"):
@@ -4148,7 +4275,7 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
    minimum = len(tokens) + 1
    maximum = len(tokens) + koboldai_vars.genamt

-    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+    if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        genout = apiactionsubmit_generate(tokens, minimum, maximum)
    elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
        genout = apiactionsubmit_tpumtjgenerate(tokens, minimum, maximum)
@@ -4278,8 +4405,8 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,

    if(actionlen == 0):
        # First/Prompt action
-        tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
-        assert len(tokens) <= koboldai_vars.max_length - lnsp - koboldai_vars.genamt - budget_deduction
+        tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
+        assert len(tokens) <= koboldai_vars.max_length - lnsp - vars.genamt - budget_deduction
        ln = len(tokens) + lnsp
        return tokens, ln+1, ln+koboldai_vars.genamt
    else:
@@ -4327,13 +4454,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
        # Did we get to add the A.N.? If not, do it here
        if(anotetxt != ""):
            if((not anoteadded) or forceanote):
-                # header, mem, wi, anote, prompt, actions
-                tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
+                tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
            else:
-                tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
+                tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens
        else:
            # Prepend Memory, WI, and Prompt before action tokens
-            tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
+            tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens

        # Send completed bundle to generator
        assert len(tokens) <= koboldai_vars.max_length - lnsp - koboldai_vars.genamt - budget_deduction
@@ -4360,23 +4486,27 @@ def calcsubmit(txt):
        else:
            subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, koboldai_vars.actions, submission=txt)
        if(actionlen == 0):
-            if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+            if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
                generate(subtxt, min, max, found_entries=found_entries)
            elif(koboldai_vars.model == "Colab"):
                sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.model == "API"):
                sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
+            elif(koboldai_vars.model == "CLUSTER"):
+                sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.model == "OAI"):
                oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
                tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
        else:
-            if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
+            if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
                generate(subtxt, min, max, found_entries=found_entries)
            elif(koboldai_vars.model == "Colab"):
                sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.model == "API"):
                sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
+            elif(koboldai_vars.model == "CLUSTER"):
+                sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.model == "OAI"):
                oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
            elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
@@ -4828,18 +4958,102 @@ def sendtoapi(txt, min, max):
        if(len(genout) == 1):
            genresult(genout[0])
        else:
+            adjusted_genout = []
+            for item in genout:
+                adjusted_genout.append({"generated_text": item})
            # Convert torch output format to transformers
            seqs = []
-            for seq in genout:
+            for seq in adjusted_genout:
                seqs.append({"generated_text": seq})
            if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
-                genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
+                genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
            else:
-                genselect(genout)
+                genselect(adjusted_genout)

        set_aibusy(0)
        return

+#==================================================================#
+#  Send transformers-style request to KoboldAI Cluster
+#==================================================================#
+def sendtocluster(txt, min, max):
+    # Log request to console
+    if not vars.quiet:
+        print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
+
+    # Store context in memory to use it for comparison with generated content
+    vars.lastctx = txt
+
+    # Build request JSON data
+    reqdata = {
+        'max_length': max - min + 1,
+        'max_context_length': vars.max_length,
+        'rep_pen': vars.rep_pen,
+        'rep_pen_slope': vars.rep_pen_slope,
+        'rep_pen_range': vars.rep_pen_range,
+        'temperature': vars.temp,
+        'top_p': vars.top_p,
+        'top_k': vars.top_k,
+        'top_a': vars.top_a,
+        'tfs': vars.tfs,
+        'typical': vars.typical,
+        'n': vars.numseqs,
+    }
+    cluster_metadata = {
+        'prompt': txt,
+        'params': reqdata,
+        'username': vars.apikey,
+        'models': vars.cluster_requested_models,
+    }
+
+    # Create request
+    req = requests.post(
+        vars.colaburl[:-8] + "/generate/sync",
+        json=cluster_metadata,
+    )
+    js = req.json()
+    if(req.status_code == 503):
+        errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths."
+        print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
+        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
+        set_aibusy(0)
+        return
+    if(req.status_code != 200):
+        errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
+        print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
+        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
+        set_aibusy(0)
+        return
+    genout = js
+
+    for i in range(vars.numseqs):
+        vars.lua_koboldbridge.outputs[i+1] = genout[i]
+
+    execute_outmod()
+    if(vars.lua_koboldbridge.regeneration_required):
+        vars.lua_koboldbridge.regeneration_required = False
+        genout = []
+        for i in range(vars.numseqs):
+            genout.append(vars.lua_koboldbridge.outputs[i+1])
+            assert type(genout[-1]) is str
+
+    if(len(genout) == 1):
+        genresult(genout[0])
+    else:
+        adjusted_genout = []
+        for item in genout:
+            adjusted_genout.append({"generated_text": item})
+        # Convert torch output format to transformers
+        seqs = []
+        for seq in adjusted_genout:
+            seqs.append({"generated_text": seq})
+        if(vars.lua_koboldbridge.restart_sequence is not None and vars.lua_koboldbridge.restart_sequence > 0):
+            genresult(adjusted_genout[vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
+        else:
+            genselect(adjusted_genout)
+
+    set_aibusy(0)
+    return

 #==================================================================#
 #  Send text to TPU mesh transformer backend
@@ -5669,7 +5883,9 @@ def oairequest(txt, min, max):
    koboldai_vars.lastctx = txt
    
    # Build request JSON data
-    if 'GooseAI' in args.configname:
+    # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
+    # as the vars.model will always be OAI
+    if 'GooseAI' in vars.configname:
        reqdata = {
            'prompt': txt,
            'max_tokens': koboldai_vars.genamt,
@@ -8021,7 +8237,7 @@ def post_story_end(body: SubmissionInputSchema):
    numseqs = koboldai_vars.numseqs
    koboldai_vars.numseqs = 1
    try:
-        actionsubmit(body.prompt, force_submit=True, no_generate=True)
+        actionsubmit(body.prompt, force_submit=True, no_generate=True, ignore_aibusy=True)
    finally:
        koboldai_vars.disable_set_aibusy = disable_set_aibusy
        koboldai_vars.standalone = _standalone
@@ -9945,6 +10161,32 @@ def get_config_soft_prompt():
    """
    return {"value": koboldai_vars.spfilename.strip()}

+class SoftPromptsListSchema(KoboldSchema):
+    values: List[SoftPromptSettingSchema] = fields.List(fields.Nested(SoftPromptSettingSchema), required=True, metadata={"description": "Array of available softprompts."})
+
+@api_v1.get("/config/soft_prompts_list")
+@api_schema_wrap
+def get_config_soft_prompts_list():
+    """---
+    get:
+      summary: Retrieve all available softprompt filenames
+      tags:
+        - config
+      responses:
+        200:
+          description: Successful request
+          content:
+            application/json:
+              schema: SoftPromptsListSchema
+              example:
+                values: []
+    """
+    splist = []
+    for sp in fileops.getspfiles(vars.modeldim):
+
+        splist.append({"value":sp["filename"]})
+    return {"values": splist}
+
@api_v1.put("/config/soft_prompt")
@api_schema_wrap
 def put_config_soft_prompt(body: SoftPromptSettingSchema):
--- a/static/application.js
+++ b/static/application.js
@@ -2919,14 +2919,35 @@ $(document).ready(function(){
 			if (msg.key) {
 				$("#modelkey").removeClass("hidden");
 				$("#modelkey")[0].value = msg.key_value;
+				if (msg.models_on_url) {
+					$("#modelkey")[0].onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': this.value, 'url': document.getElementById("modelurl").value});};
+					$("#modelurl")[0].onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': document.getElementById("modelkey").value, 'url': this.value});};
+				} else {
+					$("#modelkey")[0].onblur = function () {socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});};
+					$("#modelurl")[0].onblur = null;
+				}
 				//if we're in the API list, disable to load button until the model is selected (after the API Key is entered)
 				disableButtons([load_model_accept]);
 			} else {
 				$("#modelkey").addClass("hidden");
-				
 			}
+			
+			console.log(msg.multi_online_models);
+			if (msg.multi_online_models) {
+				$("#oaimodel")[0].setAttribute("multiple", "");
+				$("#oaimodel")[0].options[0].textContent = "All"
+			} else {
+				$("#oaimodel")[0].removeAttribute("multiple");
+				$("#oaimodel")[0].options[0].textContent = "Select Model(s)"
+			}
+			
+			
+			
 			if (msg.url) {
 				$("#modelurl").removeClass("hidden");
+				if (msg.default_url != null) {
+					$("#modelurl").value = msg.default_url;
+				}
 			} else {
 				$("#modelurl").addClass("hidden");
 			}
@@ -3287,7 +3308,11 @@ $(document).ready(function(){
 			}
 		}
 		var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
-		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': $('#oaimodel')[0].value};
+		models = getSelectedOptions(document.getElementById('oaimodel'));
+		if (models.length == 1) {
+			models = models[0];
+		}
+		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
 		socket.send(message);
 		loadmodelcontent.html("");
 		hideLoadModelPopup();
@@ -3733,3 +3758,27 @@ function upload_file(file_box) {
 	}
 }

+function getSelectedOptions(element) {
+    // validate element
+    if(!element || !element.options)
+        return []; //or null?
+
+    // return HTML5 implementation of selectedOptions instead.
+    if (element.selectedOptions) {
+        selectedOptions = element.selectedOptions;
+	} else {
+		// you are here because your browser doesn't have the HTML5 selectedOptions
+		var opts = element.options;
+		var selectedOptions = [];
+		for(var i = 0; i < opts.length; i++) {
+			 if(opts[i].selected) {
+				 selectedOptions.push(opts[i]);
+			 }
+		}
+	}
+	output = []
+	for (item of selectedOptions) {
+		output.push(item.value);
+	}
+    return output;
+}
--- a/templates/index.html
+++ b/templates/index.html
@@ -296,12 +296,12 @@
 			<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
 			</div>
 			<div class="popupfooter">
-				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
 				<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
+				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
 				<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
 			</div>
 			<div class="popupfooter">
-				<select class="form-control hidden" id="oaimodel"><option value="">Select OAI Model</option></select>
+				<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
 			</div>
 			<div class="popupfooter hidden" id=modellayers>
 				<div class='settingitem' style="width:100%">