Merge pull request #139 from ebolam/Web-UI

UI changes with AI Selection in Web
2025-06-05 21:59:24 +02:00 · 2022-06-07 20:33:46 +02:00
parent 9bf4db2a7c 6fd2496d94
commit 2333c85f4e
3 changed files with 1590 additions and 1166 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -52,6 +52,9 @@ import gensettings
 from utils import debounce
 import utils
 import structures
+import torch
+from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer
+global tpu_mtj_backend


 if lupa.LUA_VERSION[:2] != (5, 4):
@@ -83,116 +86,112 @@ class colors:
    END       = '\033[0m'
    UNDERLINE = '\033[4m'

-# AI models
-mainmenu = [
-    ["Load a model from its directory", "NeoCustom", ""],
-    ["Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom", ""],
-    ["Adventure Models", "adventurelist", ""],
-    ["Novel Models", "novellist", ""],
-    ["NSFW Models", "nsfwlist", ""],
-    ["Chatbot Models", "chatlist", ""],
-    ["Untuned GPT-Neo/J", "gptneolist", ""],
-    ["Untuned Fairseq Dense", "fsdlist", ""],
-    ["Untuned OPT", "optlist", ""],
-    ["Untuned XGLM", "xglmlist", ""],
-    ["Untuned GPT2", "gpt2list", ""],
-    ["Online Services", "apilist", ""],
-    ["Read Only (No AI)", "ReadOnly", ""]
-    ]
-
-adventurelist= [
-    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
-    ["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
-    ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
-    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
-    ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
-    ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
-    ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
-    ["Return to Main Menu", "Return", ""],
-]
-
-novellist= [
-    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
-    ["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
-    ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
-    ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
-    ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
-    ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
-    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
-    ["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
-    ["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
-    ["Return to Main Menu", "Return", ""],
-]
-
-nsfwlist= [
-    ["Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB"],
-    ["Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB"],
-    ["Lit 6B (NSFW)", "hakurei/lit-6B", "16GB"],
-    ["Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB"],
-    ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"],
-    ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"],
-    ["Return to Main Menu", "Return", ""],
-]
-
-chatlist= [
-    ["Convo 6B (Chatbot)", "hitomi-team/convo-6B", "16GB"],
-    ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB"],
-    ["C1 1.3B (Chatbot)", "iokru/c1-1.3B", "6GB"],
-    ["Return to Main Menu", "Return", ""],
-]
-gptneolist = [
-    ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB"],
-    ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB"],
-    ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB"],
-    ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB"],
-    ["Return to Main Menu", "Return", ""],
-]
-
-gpt2list = [
-    ["GPT-2 XL", "gpt2-xl", "6GB"],
-    ["GPT-2 Large", "gpt2-large", "4GB"],
-    ["GPT-2 Med", "gpt2-medium", "2GB"],
-    ["GPT-2", "gpt2", "2GB"],
-    ["Return to Main Menu", "Return", ""],
-    ]
-
-optlist = [
-    ["OPT 30B", "facebook/opt-30b", "64GB"],
-    ["OPT 13B", "facebook/opt-13b", "32GB"],
-    ["OPT 6.7B", "facebook/opt-6.7b", "16GB"],
-    ["OPT 2.7B", "facebook/opt-2.7b", "8GB"],
-    ["OPT 1.3B", "facebook/opt-1.3b", "4GB"],
-    ["OPT 350M", "facebook/opt-350m", "2GB"],
-    ["OPT 125M", "facebook/opt-125m", "1GB"],
-    ["Return to Main Menu", "Return", ""],
-    ]
-
-fsdlist = [
-    ["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
-    ["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
-    ["Fairseq Dense 2.7B", "KoboldAI/fairseq-dense-2.7B", "8GB"],
-    ["Fairseq Dense 1.3B", "KoboldAI/fairseq-dense-1.3B", "4GB"],
-    ["Fairseq Dense 355M", "KoboldAI/fairseq-dense-355M", "2GB"],
-    ["Fairseq Dense 125M", "KoboldAI/fairseq-dense-125M", "1GB"],
-    ["Return to Main Menu", "Return", ""],
-    ]
-
-xglmlist = [
-    ["XGLM 4.5B (Larger Dataset)", "facebook/xglm-4.5B", "12GB"],
-    ["XGLM 7.5B", "facebook/xglm-7.5B", "18GB"],
-    ["XGLM 2.9B", "facebook/xglm-2.9B", "10GB"],
-    ["XGLM 1.7B", "facebook/xglm-1.7B", "6GB"],
-    ["XGLM 564M", "facebook/xglm-564M", "4GB"],
-    ["Return to Main Menu", "Return", ""],
-    ]
-
-apilist = [
-    ["GooseAI API (requires API key)", "GooseAI", ""],
-    ["OpenAI API (requires API key)", "OAI", ""],
-    ["InferKit API (requires API key)", "InferKit", ""],
-    ["KoboldAI Server API (Old Google Colab)", "Colab", ""],
-    ["Return to Main Menu", "Return", ""],
+# AI models Menu
+# This is a dict of lists where they key is the menu name, and the list is the menu items.
+# Each item takes the 4 elements, 1: Text to display, 2: Model Name (var.model) or menu name (Key name for another menu),
+# 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
+model_menu = {
+    'mainmenu': [
+        ["Load a model from its directory", "NeoCustom", "", False],
+        ["Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom", "", False],
+        ["Adventure Models", "adventurelist", "", True],
+        ["Novel Models", "novellist", "", True],
+        ["NSFW Models", "nsfwlist", "", True],
+        ["Chatbot Models", "chatlist", "", True],
+        ["Untuned GPT-Neo/J", "gptneolist", "", True],
+        ["Untuned Fairseq Dense", "fsdlist", "", True],
+        ["Untuned OPT", "optlist", "", True],
+        ["Untuned XGLM", "xglmlist", "", True],
+        ["Untuned GPT2", "gpt2list", "", True],
+        ["Online Services", "apilist", "", True],
+        ["Read Only (No AI)", "ReadOnly", "", False]
+        ],
+    'adventurelist': [
+        ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB", False],
+        ["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB", False],
+        ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB", False],
+        ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
+        ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB", False],
+        ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB", False],
+        ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'novellist': [
+        ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB", False],
+        ["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB", False],
+        ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
+        ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
+        ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
+        ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
+        ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
+        ["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB", False],
+        ["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'nsfwlist': [
+        ["Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB", False],
+        ["Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB", False],
+        ["Lit 6B (NSFW)", "hakurei/lit-6B", "16GB", False],
+        ["Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB", False],
+        ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB", False],
+        ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'chatlist': [
+        ["Convo 6B (Chatbot)", "hitomi-team/convo-6B", "16GB", False],
+        ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB", False],
+        ["C1 1.3B (Chatbot)", "iokru/c1-1.3B", "6GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'gptneolist': [
+        ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
+        ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
+        ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
+        ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'gpt2list': [
+        ["GPT-2 XL", "gpt2-xl", "6GB", False],
+        ["GPT-2 Large", "gpt2-large", "4GB", False],
+        ["GPT-2 Med", "gpt2-medium", "2GB", False],
+        ["GPT-2", "gpt2", "2GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'optlist': [
+        ["OPT 30B", "facebook/opt-30b", "64GB", False],
+        ["OPT 13B", "facebook/opt-13b", "32GB", False],
+        ["OPT 6.7B", "facebook/opt-6.7b", "16GB", False],
+        ["OPT 2.7B", "facebook/opt-2.7b", "8GB", False],
+        ["OPT 1.3B", "facebook/opt-1.3b", "4GB", False],
+        ["OPT 350M", "facebook/opt-350m", "2GB", False],
+        ["OPT 125M", "facebook/opt-125m", "1GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'fsdlist': [
+        ["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB", False],
+        ["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB", False],
+        ["Fairseq Dense 2.7B", "KoboldAI/fairseq-dense-2.7B", "8GB", False],
+        ["Fairseq Dense 1.3B", "KoboldAI/fairseq-dense-1.3B", "4GB", False],
+        ["Fairseq Dense 355M", "KoboldAI/fairseq-dense-355M", "2GB", False],
+        ["Fairseq Dense 125M", "KoboldAI/fairseq-dense-125M", "1GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'xglmlist': [
+        ["XGLM 4.5B (Larger Dataset)", "facebook/xglm-4.5B", "12GB", False],
+        ["XGLM 7.5B", "facebook/xglm-7.5B", "18GB", False],
+        ["XGLM 2.9B", "facebook/xglm-2.9B", "10GB", False],
+        ["XGLM 1.7B", "facebook/xglm-1.7B", "6GB", False],
+        ["XGLM 564M", "facebook/xglm-564M", "4GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
+    'apilist': [
+        ["GooseAI API (requires API key)", "GooseAI", "", False],
+        ["OpenAI API (requires API key)", "OAI", "", False],
+        ["InferKit API (requires API key)", "InferKit", "", False],
+        ["KoboldAI Server API (Old Google Colab)", "Colab", "", False],
+        ["Return to Main Menu", "mainmenu", "", True],
    ]
+    }
 # Variables
 class vars:
    lastact     = ""     # The last action received from the user
@@ -324,9 +323,38 @@ class vars:

 utils.vars = vars

+class Send_to_socketio(object):
+    def write(self, bar):
+        print(bar, end="")
+        time.sleep(0.01)
+        emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", "&nbsp;")}, broadcast=True)
+                                
+# Set logging level to reduce chatter from Flask
+import logging
+log = logging.getLogger('werkzeug')
+log.setLevel(logging.ERROR)
+
+# Start flask & SocketIO
+print("{0}Initializing Flask... {1}".format(colors.PURPLE, colors.END), end="")
+from flask import Flask, render_template, Response, request, copy_current_request_context
+from flask_socketio import SocketIO, emit
+app = Flask(__name__, root_path=os.getcwd())
+app.config['SECRET KEY'] = 'secret!'
+socketio = SocketIO(app, async_method="eventlet")
+print("{0}OK!{1}".format(colors.GREEN, colors.END))
+
 #==================================================================#
 # Function to get model selection at startup
 #==================================================================#
+def sendModelSelection(menu="mainmenu"):
+    #If we send one of the manual load options, send back the list of model directories, otherwise send the menu
+    if menu in ('NeoCustom', 'GPT2Custom'):
+        menu_list = [[folder, menu, "", False] for folder in next(os.walk('./models'))[1]]
+        menu_list.append(["Return to Main Menu", "mainmenu", "", True])
+        emit('from_server', {'cmd': 'show_model_menu', 'data': menu_list, 'menu': 'custom'}, broadcast=True)
+    else:
+        emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu}, broadcast=True)
+
 def getModelSelection(modellist):
    print("    #    Model\t\t\t\t\t\tVRAM\n    ========================================================")
    i = 1
@@ -765,6 +793,8 @@ def check_for_sp_change():
                emit('from_server', {'cmd': 'spstatitems', 'data': {vars.spfilename: vars.spmeta} if vars.allowsp and len(vars.spfilename) else {}}, namespace=None, broadcast=True)
            vars.sp_changed = False

+socketio.start_background_task(check_for_sp_change)
+
 def spRequest(filename):
    if(not vars.allowsp):
        raise RuntimeError("Soft prompts are not supported by your current model/backend")
@@ -829,7 +859,8 @@ def spRequest(filename):
 #==================================================================#
 # Startup
 #==================================================================#
-
+def general_startup():
+    global args
    # Parsing Parameters
    parser = argparse.ArgumentParser(description="KoboldAI Server")
    parser.add_argument("--remote", action='store_true', help="Optimizes KoboldAI for Remote Play")
@@ -855,7 +886,7 @@ parser.add_argument("--quiet", action='store_true', default=False, help="If pres
    parser.add_argument("--no_aria2", action='store_true', default=False, help="Prevents KoboldAI from using aria2 to download huggingface models more efficiently, in case aria2 is causing you issues")
    parser.add_argument("--lowmem", action='store_true', help="Extra Low Memory loading for the GPU, slower but memory does not peak to twice the usage")
    parser.add_argument("--savemodel", action='store_true', help="Saves the model to the models folder even if --colab is used (Allows you to save models to Google Drive)")
-args: argparse.Namespace = None
+    #args: argparse.Namespace = None
    if(os.environ.get("KOBOLDAI_ARGS") is not None):
        import shlex
        args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
@@ -899,23 +930,210 @@ vars.smanrename = vars.host == args.override_rename

    vars.aria2_port = args.aria2_port or 6799

-# Select a model to run
-if args.model:
-    print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model)
-    if args.path:
-        print("You have selected the following path for your Model :", args.path)
-        vars.custmodpth = args.path;
-        vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
+#==================================================================#
+# Load Model
+#==================================================================# 

+def tpumtjgetsofttokens():
+    soft_tokens = None
+    if(vars.sp is None):
+        global np
+        if 'np' not in globals():
+            import numpy as np
+        tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
+        rows = tensor.shape[0]
+        padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
+        tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
+        tensor = tensor.reshape(
+            tpu_mtj_backend.params["cores_per_replica"],
+            -1,
+            tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
+        )
+        vars.sp = tpu_mtj_backend.shard_xmap(tensor)
+    soft_tokens = np.arange(
+        tpu_mtj_backend.params["n_vocab"] + tpu_mtj_backend.params["n_vocab_padding"],
+        tpu_mtj_backend.params["n_vocab"] + tpu_mtj_backend.params["n_vocab_padding"] + vars.sp_length,
+        dtype=np.uint32
+    )
+    return soft_tokens
+ 
+def get_model_info(model, directory=""):
+    # if the model is in the api list
+    key = False
+    breakmodel = False
+    gpu = False
+    layer_count = None
+    key_value = ""
+    break_values = []
+    url = False
+    if model in [x[1] for x in model_menu['apilist']]:
+        if path.exists("settings/{}.settings".format(model)):
+            with open("settings/{}.settings".format(model), "r") as file:
+                # Check if API key exists
+                js = json.load(file)
+                if("apikey" in js and js["apikey"] != ""):
+                    # API key exists, grab it and close the file
+                    key_value = js["apikey"]
+                elif 'oaiapikey' in js and js['oaiapikey'] != "":
+                    key_value = js["oaiapikey"]
+        key = True
+    elif model == 'ReadOnly':
+        pass
+    elif model == 'Colab':
+        url = True
+    elif not torch.cuda.is_available():
+        pass
    else:
-    print("{0}Welcome to the KoboldAI Server!\nListed RAM is the optimal VRAM and CPU ram can be up to twice the amount.\nMost models can run at less VRAM with reduced max tokens or less layers on the GPU.\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
-    getModelSelection(mainmenu)
+        layer_count = get_layer_count(model, directory=directory)
+        if layer_count is None:
+            breakmodel = False
+        else:
+            breakmodel = True
+            if path.exists("settings/{}.breakmodel".format(model.replace("/", "_"))):
+                with open("settings/{}.breakmodel".format(model.replace("/", "_")), "r") as file:
+                    break_values = file.read().split(",")
+            else:
+                break_values = [layer_count]
+                break_values += [0] * (gpu+1 - len(break_values))
+    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 
+                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
+                         'break_values': break_values, 'gpu_count': torch.cuda.device_count(),
+                         'url': url}, broadcast=True)
+    if key_value != "":
+        get_oai_models(key_value)
+    
+
+def get_layer_count(model, directory=""):
+    if(model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
+        if(vars.model == "GPT2Custom"):
+            model_config = open(vars.custmodpth + "/config.json", "r")
+        # Get the model_type from the config or assume a model type if it isn't present
+        else:
+            from transformers import AutoConfig
+            if vars.custmodpth == "":
+                model_config = AutoConfig.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+            elif(os.path.isdir(vars.custmodpth.replace('/', '_'))):
+                model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
+            elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
+                model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+            else:
+                model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+        
+        
+        
+        return utils.num_layers(model_config)
+    else:
+        return None
+
+
+def get_oai_models(key):
+    vars.oaiapikey = key
+    if vars.model == 'OAI':
+        url = "https://api.openai.com/v1/engines"
+    elif vars.model == 'GooseAI':
+        url = "https://api.goose.ai/v1/engines"
+    else:
+        return
+        
+    # Get list of models from OAI
+    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
+    req = requests.get(
+        url, 
+        headers = {
+            'Authorization': 'Bearer '+key
+            }
+        )
+    if(req.status_code == 200):
+        engines = req.json()["data"]
+        try:
+            engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
+        except:
+            print(engines)
+            raise
+        
+        online_model = ""
+        changed=False
+        
+        #Save the key
+        if not path.exists("settings"):
+            # If the client settings file doesn't exist, create it
+            # Write API key to file
+            os.makedirs('settings', exist_ok=True)
+        if path.exists("settings/{}.settings".format(vars.model)):
+            with open("settings/{}.settings".format(vars.model), "r") as file:
+                js = json.load(file)
+                if 'online_model' in js:
+                    online_model = js['online_model']
+                if "apikey" in js:
+                    if js['apikey'] != key:
+                        changed=True
+        if changed:
+            with open("settings/{}.settings".format(vars.model), "w") as file:
+                js["apikey"] = key
+                file.write(json.dumps(js, indent=3))
+            
+        emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True)
+    else:
+        # Something went wrong, print the message and quit since we can't initialize an engine
+        print("{0}ERROR!{1}".format(colors.RED, colors.END))
+        print(req.json())
+        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+        
+            
+
+def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=""):
+    global model
+    global generator
+    global torch
+    global model_config
+    vars.noai = False
+    if not initial_load:
+        set_aibusy(True)
+        if vars.model != 'ReadOnly':
+            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(vars.model)}, broadcast=True)
+            #Have to add a sleep so the server will send the emit for some reason
+            time.sleep(0.1)
+    if gpu_layers is not None:
+        args.breakmodel_gpulayers = gpu_layers
+    
+    #We need to wipe out the existing model and refresh the cuda cache
+    model = None
+    generator = None
+    try:
+        torch.cuda.empty_cache()
+    except:
+        pass
+    
+    #Let's set the GooseAI or OpenAI server URLs if that's applicable
+    if online_model != "":
+        if path.exists("settings/{}.settings".format(vars.model)):
+            changed=False
+            with open("settings/{}.settings".format(vars.model), "r") as file:
+                # Check if API key exists
+                js = json.load(file)
+                if 'online_model' in js:
+                    if js['online_model'] != online_model:
+                        changed=True
+                        js['online_model'] = online_model
+                else:
+                    changed=True
+                    js['online_model'] = online_model
+            if changed:
+                with open("settings/{}.settings".format(vars.model), "w") as file:
+                    file.write(json.dumps(js, indent=3))
+        # Swap OAI Server if GooseAI was selected
+        if(vars.model == "GooseAI"):
+            vars.oaiengines = "https://api.goose.ai/v1/engines"
+            vars.model = "OAI"
+            args.configname = "GooseAI" + "/" + online_model
+        else:
+            args.configname = vars.model + "/" + online_model
+        vars.oaiurl = vars.oaiengines + "/{0}/completions".format(online_model)
    
    # If transformers model was selected & GPU available, ask to use CPU or GPU
    if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        vars.allowsp = True
        # Test for GPU support
-    import torch
        
        # Make model path the same as the model name to make this consistent with the other loading method if it isn't a known model type
        # This code is not just a workaround for below, it is also used to make the behavior consistent with other loading methods - Henk717
@@ -992,19 +1210,12 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                vars.usegpu = False
                vars.breakmodel = True
            else:
-            print("    1 - GPU\n    2 - CPU\n")
                genselected = False
        else:
            genselected = False

        if(vars.hascuda):
-        while(genselected == False):
-            genselect = input("Mode> ")
-            if(genselect == ""):
-                vars.breakmodel = False
-                vars.usegpu = True
-                genselected = True
-            elif(genselect.isnumeric() and int(genselect) == 1):
+            if(use_gpu):
                if(vars.bmsupported):
                    vars.breakmodel = True
                    vars.usegpu = False
@@ -1013,47 +1224,14 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                    vars.breakmodel = False
                    vars.usegpu = True
                    genselected = True
-            elif(genselect.isnumeric() and int(genselect) == 2):
+            else:
                vars.breakmodel = False
                vars.usegpu = False
                genselected = True
-            else:
-                print("{0}Please enter a valid selection.{1}".format(colors.RED, colors.END))

    # Ask for API key if InferKit was selected
    if(vars.model == "InferKit"):
-    if(not path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
-        # If the client settings file doesn't exist, create it
-        print("{0}Please enter your InferKit API key:{1}\n".format(colors.CYAN, colors.END))
-        vars.apikey = input("Key> ")
-        # Write API key to file
-        os.makedirs('settings', exist_ok=True)
-        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
-        try:
-            js = {"apikey": vars.apikey}
-            file.write(json.dumps(js, indent=3))
-        finally:
-            file.close()
-    else:
-        # Otherwise open it up
-        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
-        # Check if API key exists
-        js = json.load(file)
-        if("apikey" in js and js["apikey"] != ""):
-            # API key exists, grab it and close the file
-            vars.apikey = js["apikey"]
-            file.close()
-        else:
-            # Get API key, add it to settings object, and write it to disk
-            print("{0}Please enter your InferKit API key:{1}\n".format(colors.CYAN, colors.END))
-            vars.apikey = input("Key> ")
-            js["apikey"] = vars.apikey
-            # Write API key to file
-            file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
-            try:
-                file.write(json.dumps(js, indent=3))
-            finally:
-                file.close()
+        vars.apikey = vars.oaiapikey
                    
    # Swap OAI Server if GooseAI was selected
    if(vars.model == "GooseAI"):
@@ -1065,103 +1243,10 @@ if(vars.model == "GooseAI"):
    if(vars.model == "OAI"):
        if not args.configname:
            args.configname = "OAI"
-    if(not path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
-        # If the client settings file doesn't exist, create it
-        print("{0}Please enter your API key:{1}\n".format(colors.CYAN, colors.END))
-        vars.oaiapikey = input("Key> ")
-        # Write API key to file
-        os.makedirs('settings', exist_ok=True)
-        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
-        try:
-            js = {"oaiapikey": vars.oaiapikey}
-            file.write(json.dumps(js, indent=3))
-        finally:
-            file.close()
-    else:
-        # Otherwise open it up
-        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
-        # Check if API key exists
-        js = json.load(file)
-        if("oaiapikey" in js and js["oaiapikey"] != ""):
-            # API key exists, grab it and close the file
-            vars.oaiapikey = js["oaiapikey"]
-            file.close()
-        else:
-            # Get API key, add it to settings object, and write it to disk
-            print("{0}Please enter your API key:{1}\n".format(colors.CYAN, colors.END))
-            vars.oaiapikey = input("Key> ")
-            js["oaiapikey"] = vars.oaiapikey
-            # Write API key to file
-            file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
-            try:
-                file.write(json.dumps(js, indent=3))
-            finally:
-                file.close()
-    
-    if vars.custmodpth:
-        vars.oaiurl = vars.oaiengines + "/" + vars.custmodpth + "/completions"
-        args.configname = args.configname + "/" + vars.custmodpth
-        engselected = True
-    else:
-        # Get list of models from OAI
-        print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
-        req = requests.get(
-            vars.oaiengines, 
-            headers = {
-                'Authorization': 'Bearer '+vars.oaiapikey
-                }
-            )
-        if(req.status_code == 200):
-            print("{0}OK!{1}".format(colors.GREEN, colors.END))
-            print("{0}Please select an engine to use:{1}\n".format(colors.CYAN, colors.END))
-            engines = req.json()["data"]
-            # Print list of engines
-            i = 0
-            for en in engines:
-                print("    {0} - {1} ({2})".format(i, en["id"], "\033[92mready\033[0m" if en["ready"] == True else "\033[91mnot ready\033[0m"))
-                i += 1
-            # Get engine to use
-            print("")
-            engselected = False
-            while(engselected == False):
-                engine = input("Engine #> ")
-                if(engine.isnumeric() and int(engine) < len(engines)):
-                    vars.oaiurl = vars.oaiengines + "/{0}/completions".format(engines[int(engine)]["id"])
-                    args.configname = args.configname + "/" + engines[int(engine)]["id"]
-                    engselected = True
-                else:
-                    print("{0}Please enter a valid selection.{1}".format(colors.RED, colors.END))
-        else:
-            # Something went wrong, print the message and quit since we can't initialize an engine
-            print("{0}ERROR!{1}".format(colors.RED, colors.END))
-            print(req.json())
-            quit()
-
-# Ask for ngrok url if Google Colab was selected
-if(vars.model == "Colab"):
-    if(vars.colaburl == ""):
-        print("{0}NOTE: For the modern KoboldAI Colab's you open the links directly in your browser.\nThis option is only for the KoboldAI Server API, not all features are supported in this mode.\n".format(colors.YELLOW, colors.END))
-        print("{0}Enter the URL of the server (For example a trycloudflare link):{1}\n".format(colors.CYAN, colors.END))
-        vars.colaburl = input("URL> ") + "/request"
        
    if(vars.model == "ReadOnly"):
        vars.noai = True

-# Set logging level to reduce chatter from Flask
-import logging
-log = logging.getLogger('werkzeug')
-log.setLevel(logging.ERROR)
-
-# Start flask & SocketIO
-print("{0}Initializing Flask... {1}".format(colors.PURPLE, colors.END), end="")
-from flask import Flask, render_template, Response, request, copy_current_request_context
-from flask_socketio import SocketIO, emit
-app = Flask(__name__, root_path=os.getcwd())
-app.config['SECRET KEY'] = 'secret!'
-socketio = SocketIO(app, async_method="eventlet")
-socketio.start_background_task(check_for_sp_change)
-print("{0}OK!{1}".format(colors.GREEN, colors.END))
-
    # Start transformers and create pipeline
    if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
        if(not vars.noai):
@@ -1243,7 +1328,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                        else:
                            num_tensors = len(device_map)
                        print(flush=True)
-                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+                        utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio())

                    with zipfile.ZipFile(f, "r") as z:
                        try:
@@ -1734,28 +1819,6 @@ else:
                return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
            modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files

-    def tpumtjgetsofttokens():
-        soft_tokens = None
-        if(vars.sp is None):
-            global np
-            if 'np' not in globals():
-                import numpy as np
-            tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
-            rows = tensor.shape[0]
-            padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
-            tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
-            tensor = tensor.reshape(
-                tpu_mtj_backend.params["cores_per_replica"],
-                -1,
-                tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
-            )
-            vars.sp = tpu_mtj_backend.shard_xmap(tensor)
-        soft_tokens = np.arange(
-            tpu_mtj_backend.params["n_vocab"] + tpu_mtj_backend.params["n_vocab_padding"],
-            tpu_mtj_backend.params["n_vocab"] + tpu_mtj_backend.params["n_vocab_padding"] + vars.sp_length,
-            dtype=np.uint32
-        )
-        return soft_tokens

        def tpumtjgenerate_warper_callback(scores) -> "np.array":
            scores_shape = scores.shape
@@ -1830,6 +1893,8 @@ else:
            loadsettings()
        # Load the TPU backend if requested
        elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
+            global tpu_mtj_backend
+            import tpu_mtj_backend
            if(vars.model == "TPUMeshTransformerGPTNeoX"):
                vars.badwordsids = vars.badwordsids_neox
            print("{0}Initializing Mesh Transformer JAX, please wait...{1}".format(colors.PURPLE, colors.END))
@@ -1853,11 +1918,25 @@ else:
        else:
            loadsettings()
    
+    lua_startup()
+    # Load scripts
+    load_lua_scripts()
+    
+    final_startup()
+    if not initial_load:
+        set_aibusy(False)
+        emit('from_server', {'cmd': 'hide_model_name'}, broadcast=True)
+        time.sleep(0.1)
+        
+        if not vars.gamestarted:
+            setStartState()
+
+
 # Set up Flask routes
@app.route('/')
@app.route('/index')
 def index():
-    return render_template('index.html')
+    return render_template('index.html', hide_ai_menu=args.remote)
@app.route('/download')
 def download():
    save_format = request.args.get("format", "json").strip().lower()
@@ -1904,7 +1983,12 @@ def download():


 #============================ LUA API =============================#
-
+_bridged = {}
+F = TypeVar("F", bound=Callable)
+def lua_startup():
+    global _bridged
+    global F
+    global bridged
    if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
        js   = json.load(file)
@@ -1922,11 +2006,44 @@ if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
            vars.corescript = "default.lua"
        file.close()
        
+    #==================================================================#
+    #  Lua runtime startup
+    #==================================================================#
+
+    print("", end="", flush=True)
+    print(colors.PURPLE + "Initializing Lua Bridge... " + colors.END, end="", flush=True)
+
+    # Set up Lua state
+    vars.lua_state = lupa.LuaRuntime(unpack_returned_tuples=True)
+
+    # Load bridge.lua
+    bridged = {
+        "corescript_path": "cores",
+        "userscript_path": "userscripts",
+        "config_path": "userscripts",
+        "lib_paths": vars.lua_state.table("lualibs", os.path.join("extern", "lualibs")),
+        "vars": vars,
+    }
+    for kwarg in _bridged:
+        bridged[kwarg] = _bridged[kwarg]
+    try:
+        vars.lua_kobold, vars.lua_koboldcore, vars.lua_koboldbridge = vars.lua_state.globals().dofile("bridge.lua")(
+            vars.lua_state.globals().python,
+            bridged,
+        )
+    except lupa.LuaError as e:
+        print(colors.RED + "ERROR!" + colors.END)
+        vars.lua_koboldbridge.obliterate_multiverse()
+        print("{0}{1}{2}".format(colors.RED, "***LUA ERROR***: ", colors.END), end="", file=sys.stderr)
+        print("{0}{1}{2}".format(colors.RED, str(e).replace("\033", ""), colors.END), file=sys.stderr)
+        exit(1)
+    print(colors.GREEN + "OK!" + colors.END)
+
+
 def lua_log_format_name(name):
    return f"[{name}]" if type(name) is str else "CORE"

-_bridged = {}
-F = TypeVar("F", bound=Callable)
+
 def bridged_kwarg(name=None):
    def _bridged_kwarg(f: F):
        _bridged[name if name is not None else f.__name__[4:] if f.__name__[:4] == "lua_" else f.__name__] = f
@@ -2493,41 +2610,7 @@ def execute_outmod():
    for k in vars.lua_deleted:
        inlinedelete(k)

-#==================================================================#
-#  Lua runtime startup
-#==================================================================#

-print("", end="", flush=True)
-print(colors.PURPLE + "Initializing Lua Bridge... " + colors.END, end="", flush=True)
-
-# Set up Lua state
-vars.lua_state = lupa.LuaRuntime(unpack_returned_tuples=True)
-
-# Load bridge.lua
-bridged = {
-    "corescript_path": "cores",
-    "userscript_path": "userscripts",
-    "config_path": "userscripts",
-    "lib_paths": vars.lua_state.table("lualibs", os.path.join("extern", "lualibs")),
-    "vars": vars,
-}
-for kwarg in _bridged:
-    bridged[kwarg] = _bridged[kwarg]
-try:
-    vars.lua_kobold, vars.lua_koboldcore, vars.lua_koboldbridge = vars.lua_state.globals().dofile("bridge.lua")(
-        vars.lua_state.globals().python,
-        bridged,
-    )
-except lupa.LuaError as e:
-    print(colors.RED + "ERROR!" + colors.END)
-    vars.lua_koboldbridge.obliterate_multiverse()
-    print("{0}{1}{2}".format(colors.RED, "***LUA ERROR***: ", colors.END), end="", file=sys.stderr)
-    print("{0}{1}{2}".format(colors.RED, str(e).replace("\033", ""), colors.END), file=sys.stderr)
-    exit(1)
-print(colors.GREEN + "OK!" + colors.END)
-
-# Load scripts
-load_lua_scripts()


 #============================ METHODS =============================#    
@@ -2854,6 +2937,55 @@ def get_message(msg):
        load_lua_scripts()
        unloaded, loaded = getuslist()
        sendUSStatItems()
+    elif(msg['cmd'] == 'list_model'):
+        sendModelSelection(menu=msg['data'])
+    elif(msg['cmd'] == 'load_model'):
+        if not os.path.exists("settings/"):
+            os.mkdir("settings")
+        changed = True
+        if os.path.exists("settings/" + vars.model.replace('/', '_') + ".breakmodel"):
+            with open("settings/" + vars.model.replace('/', '_') + ".breakmodel", "r") as file:
+                if file.read() == msg['gpu_layers']:
+                    changed = False
+        if changed:
+            f = open("settings/" + vars.model.replace('/', '_') + ".breakmodel", "w")
+            f.write(msg['gpu_layers'])
+            f.close()
+        vars.colaburl = msg['url'] + "/request"
+        load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], online_model=msg['online_model'])
+    elif(msg['cmd'] == 'show_model'):
+        print("Model Name: {}".format(getmodelname()))
+        emit('from_server', {'cmd': 'show_model_name', 'data': getmodelname()}, broadcast=True)
+    elif(msg['cmd'] == 'selectmodel'):
+        # This is run when a model line is selected from the UI (line from the model_menu variable) that is tagged as not a menu
+        # otherwise we should be running the msg['cmd'] == 'list_model'
+        
+        # We have to do a bit of processing though, if we select a custom path, we need to list out the contents of folders
+        # But if we select something else, we need to potentially show model layers for each GPU
+        # We might also need to show key input. All of that happens here
+        
+        # The data variable will contain the model name. But our Custom lines need a bit more processing
+        # If we're on a custom line that we have selected a model for, the path variable will be in msg
+        # so if that's missing we need to run the menu to show the model folders in the models folder
+        if msg['data'] in ('NeoCustom', 'GPT2Custom') and 'path' not in msg:
+            sendModelSelection(menu=msg['data'])
+        #elif msg['data'] in ('OAI', 'GooseAI'):
+        #    vars.model = msg['data']
+        #    get_oai_models()
+        #    emit('from_server', {'cmd': 'hide_layer_bar'}, broadcast=True)
+        #    emit('from_server', {'cmd': 'check_enable_model_load', 'model': vars.model}, broadcast=True)
+        else:
+            vars.model = msg['data']
+            if 'path' in msg:
+                if msg['data'] == 'NeoCustom':
+                    get_model_info(vars.custmodpth, directory=msg['path'])
+                else:
+                    get_model_info(vars.model, directory=msg['path'])
+            else:
+                get_model_info(vars.model)
+            
+    elif(msg['cmd'] == 'OAI_Key_Update'):
+        get_oai_models(msg['key'])
    elif(msg['cmd'] == 'loadselect'):
        vars.loadselect = msg["data"]
    elif(msg['cmd'] == 'spselect'):
@@ -5441,6 +5573,7 @@ def randomGameRequest(topic, memory=""):
    vars.memory      = memory
    emit('from_server', {'cmd': 'setmemory', 'data': vars.memory}, broadcast=True)

+def final_startup():
    # Prevent tokenizer from taking extra time the first time it's used
    def __preempt_tokenizer():
        if("tokenizer" not in globals()):
@@ -5526,10 +5659,16 @@ def send_debug():
 #==================================================================#
 print("", end="", flush=True)
 if __name__ == "__main__":
-    port = args.port if "port" in args and args.port is not None else 5000
    print("{0}\nStarting webserver...{1}".format(colors.GREEN, colors.END), flush=True)

+    general_startup()
+    #show_select_model_list()
+    if vars.model == "" or vars.model is None:
+        vars.model = "ReadOnly"
+    load_model(initial_load=True)
+
    # Start Flask/SocketIO (Blocking, so this must be last method!)
+    port = args.port if "port" in args and args.port is not None else 5000
    
    #socketio.run(app, host='0.0.0.0', port=port)
    if(vars.host):
@@ -5577,4 +5716,9 @@ if __name__ == "__main__":
            socketio.run(app, port=port)

 else:
+    general_startup()
+    #show_select_model_list()
+    if vars.model == "" or vars.model is None:
+        vars.model = "ReadOnly"
+    load_model(initial_load=True)
    print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)
--- a/static/application.js
+++ b/static/application.js
@@ -7,6 +7,7 @@ var socket;

 // UI references for jQuery
 var connect_status;
+var button_loadmodel;
 var button_newgame;
 var button_rndgame;
 var button_save;
@@ -55,6 +56,7 @@ var savepins;
 var topic;
 var saveas_accept;
 var saveas_close;
+var loadmodelpopup;
 var loadpopup;
 var	loadcontent;
 var	load_accept;
@@ -98,6 +100,7 @@ var remote = false;
 var gamestate = "";
 var gamesaved = true;
 var modelname = null;
+var model = "";

 // This is true iff [we're in macOS and the browser is Safari] or [we're in iOS]
 var using_webkit_patch = true;
@@ -943,6 +946,17 @@ function sendSaveAsRequest() {
 	socket.send({'cmd': 'saveasrequest', 'data': {"name": saveasinput.val(), "pins": savepins.val()}});
 }

+function showLoadModelPopup() {
+	loadmodelpopup.removeClass("hidden");
+	loadmodelpopup.addClass("flex");
+}
+
+function hideLoadModelPopup() {
+	loadmodelpopup.removeClass("flex");
+	loadmodelpopup.addClass("hidden");
+	loadmodelcontent.html("");
+}
+
 function showLoadPopup() {
 	loadpopup.removeClass("hidden");
 	loadpopup.addClass("flex");
@@ -976,6 +990,58 @@ function hideUSPopup() {
 	spcontent.html("");
 }

+
+function buildLoadModelList(ar, menu) {
+	disableButtons([load_model_accept]);
+	loadmodelcontent.html("");
+	var i;
+	for(i=0; i<ar.length; i++) {
+		var html
+		html = "<div class=\"flex\">\
+			<div class=\"loadlistpadding\"></div>"
+		if(ar[i][3]) {
+			html = html + "<span class=\"loadlisticon loadmodellisticon-folder oi oi-folder allowed\"  aria-hidden=\"true\"></span>"
+		} else {
+			html = html + "<div class=\"loadlistpadding\"></div>"
+		}
+		html = html + "<div class=\"loadlistpadding\"></div>\
+						<div class=\"loadlistitem\" id=\"loadmodel"+i+"\" name=\""+ar[i][1]+"\" pretty_name=\""+ar[i][0]+"\">\
+							<div>"+ar[i][0]+"</div>\
+							<div class=\"flex-push-right\">"+ar[i][2]+"</div>\
+						</div>\
+					</div>"
+		loadmodelcontent.append(html);
+		//If this is a menu
+		if(ar[i][3]) {
+			$("#loadmodel"+i).off("click").on("click", (function () {
+				return function () {
+					socket.send({'cmd': 'list_model', 'data': $(this).attr("name"), 'pretty_name': $(this).attr("pretty_name")});
+					disableButtons([load_model_accept]);
+				}
+			})(i));
+		//If we're in the custom load menu (we need to send the path data back in that case)
+		} else if(menu == 'custom') {
+			$("#loadmodel"+i).off("click").on("click", (function () {
+				return function () {
+					socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name"), 'path': $(this).attr("pretty_name")});
+					highlightLoadLine($(this));
+				}
+			})(i));
+		//Normal load
+		} else {
+			$("#loadmodel"+i).off("click").on("click", (function () {
+				return function () {
+					$("#use_gpu_div").addClass("hidden");
+					$("#modelkey").addClass("hidden");
+					$("#modellayers").addClass("hidden");
+					socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name")});
+					highlightLoadLine($(this));
+				}
+			})(i));
+		}
+	}
+}
+
 function buildLoadList(ar) {
 	disableButtons([load_accept]);
 	loadcontent.html("");
@@ -1111,6 +1177,7 @@ function buildUSList(unloaded, loaded) {

 function highlightLoadLine(ref) {
 	$("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected");
+	$("#loadmodellistcontent > div > div.popuplistselected").removeClass("popuplistselected");
 	ref.addClass("popuplistselected");
 }

@@ -1814,6 +1881,29 @@ function unbindGametext() {
 	gametext_bound = false;
 }

+function update_gpu_layers() {
+	var gpu_layers
+	gpu_layers = 0;
+	for (let i=0; i < $("#gpu_count")[0].value; i++) {
+		gpu_layers += parseInt($("#gpu_layers"+i)[0].value);
+	}
+	if (gpu_layers > parseInt(document.getElementById("gpu_layers_max").innerHTML)) {
+		disableButtons([load_model_accept]);
+		$("#gpu_layers_current").html("<span style='color: red'>"+gpu_layers+"</span>");
+	} else {
+		enableButtons([load_model_accept]);
+		$("#gpu_layers_current").html(gpu_layers);
+	}
+}
+
+
+function RemoveAllButFirstOption(selectElement) {
+   var i, L = selectElement.options.length - 1;
+   for(i = L; i >= 1; i--) {
+      selectElement.remove(i);
+   }
+}
+
 //=================================================================//
 //  READY/RUNTIME
 //=================================================================//
@@ -1822,6 +1912,8 @@ $(document).ready(function(){
 	
 	// Bind UI references
 	connect_status    = $('#connectstatus');
+	button_loadmodel  = $('#btn_loadmodel');
+	button_showmodel  = $('#btn_showmodel');
 	button_newgame    = $('#btn_newgame');
 	button_rndgame    = $('#btn_rndgame');
 	button_save       = $('#btn_save');
@@ -1874,9 +1966,13 @@ $(document).ready(function(){
 	saveas_accept     = $("#btn_saveasaccept");
 	saveas_close      = $("#btn_saveasclose");
 	loadpopup         = $("#loadcontainer");
+	loadmodelpopup    = $("#loadmodelcontainer");
 	loadcontent       = $("#loadlistcontent");
+	loadmodelcontent  = $("#loadmodellistcontent");
 	load_accept       = $("#btn_loadaccept");
 	load_close        = $("#btn_loadclose");
+	load_model_accept = $("#btn_loadmodelaccept");
+	load_model_close  = $("#btn_loadmodelclose");
 	sppopup           = $("#spcontainer");
 	spcontent         = $("#splistcontent");
 	sp_accept         = $("#btn_spaccept");
@@ -1899,6 +1995,7 @@ $(document).ready(function(){
 	socket = io.connect(window.document.origin, {transports: ['polling', 'websocket'], closeOnBeforeunload: false});

 	socket.on('from_server', function(msg) {
+		//console.log(msg);
 		if(msg.cmd == "connected") {
 			// Connected to Server Actions
 			sman_allow_delete = msg.hasOwnProperty("smandelete") && msg.smandelete;
@@ -2374,6 +2471,84 @@ $(document).ready(function(){
 			} else {
 				debug_area.addClass("hidden");
 			}
+		} else if(msg.cmd == 'show_model_menu') {
+			$("#use_gpu_div").addClass("hidden");
+			$("#modelkey").addClass("hidden");
+			$("#modellayers").addClass("hidden");
+			$("#oaimodel").addClass("hidden")
+			buildLoadModelList(msg.data, msg.menu);
+		} else if(msg.cmd == 'selected_model_info') {
+			enableButtons([load_model_accept]);
+			$("#oaimodel").addClass("hidden")
+			$("#oaimodel")[0].options[0].selected = true;
+			if (msg.key) {
+				$("#modelkey").removeClass("hidden");
+				$("#modelkey")[0].value = msg.key_value;
+			} else {
+				$("#modelkey").addClass("hidden");
+				
+			}
+			if (msg.url) {
+				$("#modelurl").removeClass("hidden");
+			} else {
+				$("#modelurl").addClass("hidden");
+			}
+			if (msg.gpu) {
+				$("#use_gpu_div").removeClass("hidden");
+			} else {
+				$("#use_gpu_div").addClass("hidden");
+			}
+			if (msg.breakmodel) {
+				var html;
+				$("#modellayers").removeClass("hidden");
+				html = "";
+				msg.break_values.forEach(function (item, index) {
+					html += "GPU " + index + ": <input type='range' class='form-range airange' min='0' max='"+msg.layer_count+"' step='1' value='"+item+"' id='gpu_layers"+index+"' onchange='update_gpu_layers();'>";
+				})
+				$("#model_layer_bars").html(html);
+				$("#gpu_layers_max").html(msg.layer_count);
+				$("#gpu_count")[0].value = msg.gpu_count;
+				update_gpu_layers();
+			} else {
+				$("#modellayers").addClass("hidden");
+			}
+		} else if(msg.cmd == 'oai_engines') {
+			$("#oaimodel").removeClass("hidden")
+			selected_item = 0;
+			length = $("#oaimodel")[0].options.length;
+			for (let i = 0; i < length; i++) {
+				$("#oaimodel")[0].options.remove(1);
+			}
+			msg.data.forEach(function (item, index) {
+				var option = document.createElement("option");
+				option.value = item[0];
+				option.text = item[1];
+				if(msg.online_model == item[0]) {
+					selected_item = index+1;
+				}
+				$("#oaimodel")[0].appendChild(option);
+				if(selected_item != "") {
+					$("#oaimodel")[0].options[selected_item].selected = true;
+				}
+			})
+		} else if(msg.cmd == 'show_model_name') {
+			$("#showmodelnamecontent").html("<div class=\"flex\"><div class=\"loadlistpadding\"></div><div class=\"loadlistitem\">" + msg.data + "</div></div>");
+			$("#showmodelnamecontainer").removeClass("hidden");
+		} else if(msg.cmd == 'hide_model_name') {
+			$("#showmodelnamecontainer").addClass("hidden");
+			//console.log("Closing window");
+		} else if(msg.cmd == 'model_load_status') {
+			$("#showmodelnamecontent").html("<div class=\"flex\"><div class=\"loadlistpadding\"></div><div class=\"loadlistitem\" style='align: left'>" + msg.data + "</div></div>");
+			$("#showmodelnamecontainer").removeClass("hidden");
+			//console.log(msg.data);
+		} else if(msg.cmd == 'oai_engines') {
+			RemoveAllButFirstOption($("#oaimodel")[0]);
+			for (const engine of msg.data) {
+				var opt = document.createElement('option');
+				opt.value = engine[0];
+				opt.innerHTML = engine[1];
+				$("#oaimodel")[0].appendChild(opt);
+			}
 		}
 	});
 	
@@ -2588,6 +2763,11 @@ $(document).ready(function(){
 		hideLoadPopup();
 	});
 	
+	load_model_close.on("click", function(ev) {
+		$("#modellayers").addClass("hidden");
+		hideLoadModelPopup();
+	});
+	
 	load_accept.on("click", function(ev) {
 		hideMessage();
 		newly_loaded = true;
@@ -2595,6 +2775,24 @@ $(document).ready(function(){
 		hideLoadPopup();
 	});
 	
+	load_model_accept.on("click", function(ev) {
+		hideMessage();
+		var gpu_layers;
+		var message;
+		if($("#modellayers")[0].classList.contains('hidden')) {
+			gpu_layers = ","
+		} else {
+			gpu_layers = ""
+			for (let i=0; i < $("#gpu_count")[0].value; i++) {
+				gpu_layers += $("#gpu_layers"+i)[0].value + ",";
+			}
+		}
+		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'url': $('#modelurl')[0].value, 'online_model': $('#oaimodel')[0].value};
+		socket.send(message);
+		loadmodelcontent.html("");
+		hideLoadModelPopup();
+	});
+
 	sp_close.on("click", function(ev) {
 		hideSPPopup();
 	});
@@ -2617,6 +2815,14 @@ $(document).ready(function(){
 		hideUSPopup();
 	});
 	
+	button_loadmodel.on("click", function(ev) {
+		showLoadModelPopup();
+		socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+	});
+	button_showmodel.on("click", function(ev) {
+		socket.send({'cmd': 'show_model', 'data': ''});
+	});
+	
 	button_newgame.on("click", function(ev) {
 		if(connected) {
 			showNewStoryPopup();
--- a/templates/index.html
+++ b/templates/index.html
@@ -33,6 +33,15 @@
 						</button>
 						<div class="collapse navbar-collapse" id="navbarNavDropdown">
 							<ul class="nav navbar-nav">
+								{% if not hide_ai_menu %}
+								<li class="nav-item dropdown">
+									<a class="nav-link dropdown-toggle" href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">AI</a>
+									<div class="dropdown-menu">
+										<a class="dropdown-item" href="#" id="btn_loadmodel">Load Model</a>
+										<a class="dropdown-item" href="#" id="btn_showmodel">Model Info</a>
+									</div>
+								</li>
+								{% endif %}
 								<li class="nav-item dropdown">
 									<a class="nav-link dropdown-toggle" href="#" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">New Game</a>
 									<div class="dropdown-menu">
@@ -265,6 +274,58 @@
 			</div>
 		</div>
 	</div>
+	<div class="popupcontainer hidden" id="loadmodelcontainer">
+		<div id="loadpopup">
+			<div class="popuptitlebar">
+				<div class="popuptitletext">Select A Model To Load</div>
+			</div>
+			<div class="loadmodellistheader">
+				<div>Model</div>
+			</div>
+			<div id="loadmodellistcontent" style="overflow: scroll; height: 300px;">
+			</div>
+			<div class="popupfooter">
+				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
+				<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
+			</div>
+			<div class="popupfooter">
+				<select class="form-control hidden" id="oaimodel"><option value="">Select OAI Model</option></select>
+			</div>
+			<div class="popupfooter hidden" id=modellayers>
+				<div class='settingitem' style="width:100%">
+					<div class='settinglabel'>
+						<div class="justifyleft">
+							GPU Layers
+							<span class="helpicon">?
+								<span class="helptext">Number of layers to assign to the GPU</span>
+							</span>
+						</div>
+						<div class="justifyright" id="gpu_layers_current">0</div>
+					</div>
+					<div id=model_layer_bars style="color: white">
+						
+					</div>
+					<input type=hidden id='gpu_count' value=0/>
+					<div class="settingminmax">
+						<div class="justifyleft">
+							0
+						</div>
+						<div class="justifyright" id="gpu_layers_max">
+							24
+						</div>
+					</div>
+				</div>
+			</div>
+			<div class="popupfooter">
+				<button type="button" class="btn btn-primary" id="btn_loadmodelaccept">Load</button>
+				<button type="button" class="btn btn-primary" id="btn_loadmodelclose">Cancel</button>
+				<div class="box flex-push-right hidden" id=use_gpu_div>
+					<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
+					<div class="box-label">Use GPU</div>
+				</div>
+			</div>
+		</div>
+	</div>
 	<div class="popupcontainer hidden" id="spcontainer">
 		<div id="sppopup">
 			<div class="popuptitlebar">
@@ -351,6 +412,19 @@
 			</div>
 		</div>
 	</div>
+	<div class="popupcontainer hidden" id="showmodelnamecontainer" style="center;">
+		<div>
+			<div class="popuptitlebar" style="width:50% center;">
+				<div class="popuptitletext">Model Info</div>
+			</div>
+			<div class="aidgpopuplistheader" id=showmodelnamecontent style="width:50% center;">
+				Read Only
+			</div>
+			<div class="popupfooter" style="width:50% center;">
+				<button type="button" class="btn btn-primary" onclick='$("#showmodelnamecontainer").addClass("hidden");'>OK</button>
+			</div>
+		</div>
+	</div>
 	<div class="popupcontainer hidden" id="rndgamecontainer">
 		<div id="rspopup">
 			<div class="popuptitlebar">