diff --git a/aiserver.py b/aiserver.py index 7b94e15e..faffe0e2 100644 --- a/aiserver.py +++ b/aiserver.py @@ -56,6 +56,7 @@ import html import argparse import sys import gc +import traceback import lupa @@ -167,6 +168,7 @@ class MenuFolder(MenuItem): "size": "", "isMenu": True, "isDownloaded": False, + "isDirectory": False } class MenuModel(MenuItem): @@ -177,11 +179,13 @@ class MenuModel(MenuItem): vram_requirements: str = "", model_type: MenuModelType = MenuModelType.HUGGINGFACE, experimental: bool = False, + model_backend: str = "Huggingface", ) -> None: super().__init__(label, name, experimental) self.model_type = model_type self.vram_requirements = vram_requirements self.is_downloaded = is_model_downloaded(self.name) + self.model_backend = model_backend def to_ui1(self) -> list: return [ @@ -199,8 +203,28 @@ class MenuModel(MenuItem): "size": self.vram_requirements, "isMenu": False, "isDownloaded": self.is_downloaded, + "isDirectory": False, } +class MenuPath(MenuItem): + def to_ui1(self) -> list: + return [ + self.label, + self.name, + "", + True, + ] + + def to_json(self) -> dict: + return { + "label": self.label, + "name": self.name, + "size": "", + "isMenu": True, + "isDownloaded": False, + "isDirectory": True, + "path": "./models" + } # AI models Menu # This is a dict of lists where they key is the menu name, and the list is the menu items. @@ -208,9 +232,9 @@ class MenuModel(MenuItem): # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False) model_menu = { "mainmenu": [ - MenuModel("Load a model from its directory", "NeoCustom"), - MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), - MenuFolder("Load custom model from Hugging Face", "customhuggingface"), + MenuPath("Load a model from its directory", "NeoCustom"), + MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), + MenuModel("Load custom model from Hugging Face", "customhuggingface", ""), MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), @@ -224,7 +248,7 @@ model_menu = { MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), - MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), + MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), ], 'adventurelist': [ MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"), @@ -361,12 +385,11 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ - MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API), - MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API), - MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API), - MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API), + MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"), + MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"), + MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"), + MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"), + MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"), MenuFolder("Return to Main Menu", "mainmenu"), ] } @@ -599,6 +622,24 @@ utils.socketio = socketio # Weird import position to steal koboldai_vars from utils from modeling.patches import patch_transformers +#Load all of the model importers +import importlib +model_backend_code = {} +model_backends = {} +for module in os.listdir("./modeling/inference_models"): + if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__': + try: + model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) + model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]): + if model_backends[model_backend_code[module].model_backend_name].disable: + del model_backends[model_backend_code[module].model_backend_name] + except Exception: + logger.error("Model Backend {} failed to load".format(module)) + logger.error(traceback.format_exc()) + +logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends]))) + old_socketio_on = socketio.on def new_socketio_on(*a, **k): @@ -614,10 +655,14 @@ def new_socketio_on(*a, **k): socketio.on = new_socketio_on def emit(*args, **kwargs): - try: - return _emit(*args, **kwargs) - except AttributeError: - return socketio.emit(*args, **kwargs) + if has_request_context(): + try: + return _emit(*args, **kwargs) + except AttributeError: + return socketio.emit(*args, **kwargs) + else: #We're trying to send data outside of the http context. This won't work. Try the relay + if koboldai_settings.queue is not None: + koboldai_settings.queue.put([args[0], args[1], kwargs]) utils.emit = emit #replacement for tpool.execute to maintain request contexts @@ -905,6 +950,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"): ) def get_folder_path_info(base): + if base is None: + return [], [] if base == 'This PC': breadcrumbs = [['This PC', 'This PC']] paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))] @@ -987,7 +1034,7 @@ def getmodelname(): if(koboldai_vars.online_model != ''): return(f"{koboldai_vars.model}/{koboldai_vars.online_model}") if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): - modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth)) + modelname = os.path.basename(os.path.normpath(model.path)) return modelname else: modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only" @@ -1318,16 +1365,14 @@ def general_startup(override_args=None): parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") + parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use") + parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.") parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)") parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.") - parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS) - parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS) - parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11") - parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.") parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.") parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.") parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.") @@ -1360,6 +1405,7 @@ def general_startup(override_args=None): args = parser.parse_args(shlex.split(override_args)) elif(os.environ.get("KOBOLDAI_ARGS") is not None): import shlex + logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"])) args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"])) else: args = parser.parse_args() @@ -1382,9 +1428,11 @@ def general_startup(override_args=None): for arg in temp: if arg == "path": if "model_path" in os.environ: + logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"])) setattr(args, arg, os.environ["model_path"]) else: if arg in os.environ: + logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg])) if isinstance(getattr(args, arg), bool): if os.environ[arg].lower() == "true": setattr(args, arg, True) @@ -1410,8 +1458,6 @@ def general_startup(override_args=None): args.max_summary_length = int(args.max_summary_length) - if args.model: - koboldai_vars.model = args.model; koboldai_vars.revision = args.revision koboldai_settings.multi_story = args.multi_story @@ -1436,7 +1482,7 @@ def general_startup(override_args=None): koboldai_vars.quiet = True if args.nobreakmodel: - koboldai_vars.nobreakmodel = True + model_backends['Huggingface'].nobreakmodel = True if args.remote: koboldai_vars.host = True; @@ -1447,6 +1493,9 @@ def general_startup(override_args=None): if args.localtunnel: koboldai_vars.host = True; + if args.lowmem: + model_backends['Huggingface'].low_mem = True + if args.host != "Disabled": # This means --host option was submitted without an argument # Enable all LAN IPs (0.0.0.0/0) @@ -1479,6 +1528,9 @@ def general_startup(override_args=None): koboldai_vars.trust_remote_code = True if args.cpu: koboldai_vars.use_colab_tpu = False + koboldai_vars.hascuda = False + koboldai_vars.usegpu = False + model_backends['Huggingface'].nobreakmodel = True koboldai_vars.smandelete = koboldai_vars.host == args.override_delete koboldai_vars.smanrename = koboldai_vars.host == args.override_rename @@ -1493,262 +1545,67 @@ def general_startup(override_args=None): if(modpath): # Save directory to koboldai_vars koboldai_vars.model = "NeoCustom" - koboldai_vars.custmodpth = modpath + args.path = modpath elif args.model: logger.message(f"Welcome to KoboldAI!") - logger.message(f"You have selected the following Model: {koboldai_vars.model}") + logger.message(f"You have selected the following Model: {args.model}") if args.path: logger.message(f"You have selected the following path for your Model: {args.path}") - koboldai_vars.custmodpth = args.path; - koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple + model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple #setup socketio relay queue koboldai_settings.queue = multiprocessing.Queue() socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) - -#==================================================================# -# Load Model -#==================================================================# - -@socketio.on("get_model_info") -def get_model_info(model, directory=""): - logger.info("Selected: {}, {}".format(model, directory)) - # if the model is in the api list - disk_blocks = 0 - key = False - breakmodel = False - gpu = False - layer_count = None - key_value = "" - break_values = [] - url = False - default_url = None - models_on_url = False - multi_online_models = False - show_online_model_select=False - gpu_count = torch.cuda.device_count() - gpu_names = [] - send_horde_models = False - show_custom_model_box = False - for i in range(gpu_count): - gpu_names.append(torch.cuda.get_device_name(i)) - if model in ['Colab', 'API']: - url = True - elif model == 'CLUSTER': - models_on_url = True - show_online_model_select=True - url = True - key = True - default_url = koboldai_vars.horde_url - multi_online_models = True - key_value = koboldai_vars.horde_api_key - url = koboldai_vars.horde_url - if key_value: - send_horde_models = True - elif model in [x.name for x in model_menu['apilist']]: - show_online_model_select=True - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - # Check if API key exists - try: - js = json.load(file) - - if("apikey" in js and js["apikey"] != ""): - # API key exists, grab it and close the file - key_value = js["apikey"] - elif 'oaiapikey' in js and js['oaiapikey'] != "": - key_value = js["oaiapikey"] - if model in ('GooseAI', 'OAI'): - get_oai_models({'model': model, 'key': key_value}) - except json.decoder.JSONDecodeError: - print(":(") - pass - key = True - elif model == 'ReadOnly': - pass - #elif model == 'customhuggingface': - # show_custom_model_box = True - elif args.cpu: - pass - else: - layer_count = get_layer_count(model, directory=directory) - if layer_count is None: - breakmodel = False - gpu = True - else: - breakmodel = True - if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]: - filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory))) - else: - filename = "settings/{}.breakmodel".format(model.replace("/", "_")) - if path.exists(filename): - with open(filename, "r") as file: - data = [x for x in file.read().split("\n")[:2] if x != ''] - if len(data) < 2: - data.append("0") - break_values, disk_blocks = data - break_values = break_values.split(",") - else: - break_values = [layer_count] - break_values = [int(x) for x in break_values if x != ''] - break_values += [0] * (gpu_count - len(break_values)) - emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, - 'disk_break_value': disk_blocks, 'accelerate': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, - 'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1") - emit('selected_model_info', {'key_value': key_value, 'key':key, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'disk_break_value': disk_blocks, 'disk_break': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select, - 'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False, - 'show_custom_model_box': show_custom_model_box}) - if send_horde_models: - get_cluster_models({'key': key_value, 'url': default_url}) - elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER': - get_oai_models(key_value) + if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface": + args.model_backend = "Huggingface MTJ" + - - -def get_layer_count(model, directory=""): - if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]): - if(model == "GPT2Custom"): - with open(os.path.join(directory, "config.json"), "r") as f: - model_config = json.load(f) - # Get the model_type from the config or assume a model type if it isn't present - else: - if(directory): - model = directory - from transformers import AutoConfig - if(os.path.isdir(model.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - elif(is_model_downloaded(model)): - model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(directory)): - model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - else: - model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache") - try: - if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel: - return utils.num_layers(model_config) - else: - return None - except: - return None - else: - return None - -@socketio.on('OAI_Key_Update') -def get_oai_models(data): - key = data['key'] - model = data['model'] - koboldai_vars.oaiapikey = key - if model == 'OAI': - url = "https://api.openai.com/v1/engines" - elif model == 'GooseAI': - url = "https://api.goose.ai/v1/engines" - else: - return + if args.model: + # At this point we have to try to load the model through the selected backend + if args.model_backend not in model_backends: + logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends]))) + exit() + #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it + parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "") + ok_to_load = True + mising_parameters = [] + arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {} - # Get list of models from OAI - logger.init("OAI Engines", status="Retrieving") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+key - } - ) - if(req.status_code == 200): - r = req.json() - engines = r["data"] - try: - engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines] - except: - logger.error(engines) - raise + #If we're on colab we'll set everything to GPU0 + if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab: + arg_parameters['use_gpu'] = True - online_model = "" - changed=False - #Save the key - if not path.exists("settings"): - # If the client settings file doesn't exist, create it - # Write API key to file - os.makedirs('settings', exist_ok=True) - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - js = json.load(file) - if 'online_model' in js: - online_model = js['online_model'] - if "apikey" in js: - if js['apikey'] != key: - changed=True - else: - js = {} - changed=True - - if changed: - with open("settings/{}.v2_settings".format(model), "w") as file: - js["apikey"] = key - file.write(json.dumps(js, indent=3)) - - logger.init_ok("OAI Engines", status="OK") - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") + for parameter in parameters: + if parameter['uitype'] != "Valid Display": + if parameter['default'] == "" and parameter['id'] not in arg_parameters: + mising_parameters.append(parameter['id']) + ok_to_load = False + elif parameter['id'] not in arg_parameters: + arg_parameters[parameter['id']] = parameter['default'] + if not ok_to_load: + logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"]))) + logger.error("Missing: {}".format(", ".join(mising_parameters))) + exit() + if args.model_parameters.lower() == "help": + logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"]))) + exit() + arg_parameters['id'] = args.model + arg_parameters['model'] = args.model + arg_parameters['path'] = args.path + arg_parameters['menu_path'] = "" + model_backends[args.model_backend].set_input_parameters(arg_parameters) + koboldai_vars.model = args.model + return args.model_backend else: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("OAI Engines", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) - -@socketio.on("get_cluster_models") -def get_cluster_models(msg): - koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key - url = msg['url'] or koboldai_vars.horde_url - koboldai_vars.horde_url = url - # Get list of models from public cluster - print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="") - try: - req = requests.get(f"{url}/api/v2/status/models?type=text") - except: - logger.init_err("KAI Horde Models", status="Failed") - logger.error("Provided KoboldAI Horde URL unreachable") - emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) - return - if not req.ok: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("KAI Horde Models", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") - return - - engines = req.json() - logger.debug(engines) - try: - engines = [[en["name"], en["name"]] for en in engines] - except: - logger.error(engines) - raise - logger.debug(engines) - - online_model = "" - savesettings() - - logger.init_ok("KAI Horde Models", status="OK") - - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - - -def reset_model_settings(): - koboldai_vars.reset_for_model_load() + return "Read Only" + + def unload_model(): global model @@ -1781,7 +1638,7 @@ def unload_model(): koboldai_vars.badwordsids = koboldai_settings.badwordsids_default -def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False): +def load_model(model_backend, initial_load=False): global model global tokenizer global model_config @@ -1792,188 +1649,48 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if initial_load: use_breakmodel_args = True - reset_model_settings() koboldai_vars.reset_model() - koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model - if koboldai_vars.cluster_requested_models == [""]: - koboldai_vars.cluster_requested_models = [] - koboldai_vars.noai = False - if not use_breakmodel_args: - set_aibusy(True) - if koboldai_vars.model != 'ReadOnly': - emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) - #Have to add a sleep so the server will send the emit for some reason - time.sleep(0.1) + set_aibusy(True) + if koboldai_vars.model != 'ReadOnly': + emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True) + #Have to add a sleep so the server will send the emit for some reason + time.sleep(0.1) - if gpu_layers is not None: - args.breakmodel_gpulayers = gpu_layers - elif use_breakmodel_args: - gpu_layers = args.breakmodel_gpulayers - if breakmodel_args_default_to_cpu and gpu_layers is None: - gpu_layers = args.breakmodel_gpulayers = [] - if disk_layers is not None: - args.breakmodel_disklayers = int(disk_layers) - elif use_breakmodel_args: - disk_layers = args.breakmodel_disklayers - if breakmodel_args_default_to_cpu and disk_layers is None: - disk_layers = args.breakmodel_disklayers = 0 + if 'model' in globals(): + model.unload() - unload_model() - - if online_model == "": - koboldai_vars.configname = getmodelname() - #Let's set the GooseAI or OpenAI server URLs if that's applicable - else: - koboldai_vars.online_model = online_model - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}" - elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list): - if len(online_model) != 1: - koboldai_vars.configname = koboldai_vars.model - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}" - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}" - - if path.exists(get_config_filename()): - changed=False - with open(get_config_filename(), "r") as file: - # Check if API key exists - js = json.load(file) - if 'online_model' in js: - if js['online_model'] != online_model: - changed=True - js['online_model'] = online_model - else: - changed=True - js['online_model'] = online_model - - if changed: - with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file: - file.write(json.dumps(js, indent=3)) - - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - args.configname = "GooseAI" + "/" + online_model - elif koboldai_vars.model != "CLUSTER": - args.configname = koboldai_vars.model + "/" + online_model - koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model) # If transformers model was selected & GPU available, ask to use CPU or GPU if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): # loadmodelsettings() # loadsettings() logger.init("GPU support", status="Searching") - koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel - if(args.breakmodel is not None and args.breakmodel): - logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).") - if(args.breakmodel_layers is not None): - logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).") - if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)): - logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.") - koboldai_vars.bmsupported = False - if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)): - logger.warning("This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.") if(koboldai_vars.hascuda): logger.init_ok("GPU support", status="Found") else: logger.init_warn("GPU support", status="Not Found") - if args.cpu: - koboldai_vars.usegpu = False - gpu_layers = None - disk_layers = None - koboldai_vars.breakmodel = False - elif koboldai_vars.hascuda: - if(koboldai_vars.bmsupported): - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = True - else: - koboldai_vars.breakmodel = False - koboldai_vars.usegpu = use_gpu + #if koboldai_vars.hascuda: + # if(koboldai_vars.bmsupported): + # koboldai_vars.usegpu = False + # koboldai_vars.breakmodel = True + # else: + # koboldai_vars.breakmodel = False + # koboldai_vars.usegpu = use_gpu else: koboldai_vars.default_preset = koboldai_settings.default_preset - - # Ask for API key if InferKit was selected - if koboldai_vars.model == "InferKit": - koboldai_vars.apikey = koboldai_vars.oaiapikey - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = "GooseAI" - - # Ask for API key if OpenAI was selected - if koboldai_vars.model == "OAI" and not koboldai_vars.configname: - koboldai_vars.configname = "OAI" - - if koboldai_vars.model == "ReadOnly": - koboldai_vars.noai = True - - # TODO: InferKit - if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai: - pass - elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]: - koboldai_vars.colaburl = url or koboldai_vars.colaburl - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = False - - if koboldai_vars.model == "Colab": - from modeling.inference_models.basic_api import BasicAPIInferenceModel - model = BasicAPIInferenceModel() - elif koboldai_vars.model == "API": - from modeling.inference_models.api import APIInferenceModel - model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", "")) - elif koboldai_vars.model == "CLUSTER": - from modeling.inference_models.horde import HordeInferenceModel - model = HordeInferenceModel() - elif koboldai_vars.model == "OAI": - from modeling.inference_models.openai import OpenAIAPIInferenceModel - model = OpenAIAPIInferenceModel() - - model.load(initial_load=initial_load) - # TODO: This check sucks, make a model object or somethign - elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: - # HF Torch - logger.init("Transformers", status='Starting') - for m in ("GPTJModel", "XGLMModel"): - try: - globals()[m] = getattr(__import__("transformers"), m) - except: - pass - - from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel - model = GenericHFTorchInferenceModel( - koboldai_vars.model, - lazy_load=koboldai_vars.lazy_load, - low_mem=args.lowmem - ) - - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) - logger.info(f"Pipeline created: {koboldai_vars.model}") - else: - # TPU - from modeling.inference_models.hf_mtj import HFMTJInferenceModel - model = HFMTJInferenceModel( - koboldai_vars.model - ) - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) + model = model_backends[model_backend] + model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) + koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup + if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"): + koboldai_vars.model = os.path.basename(os.path.normpath(model.path)) + logger.info(koboldai_vars.model) + logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer if model: @@ -3993,7 +3710,8 @@ def calcsubmit(txt): bias += [1] * (i - top_index) bias[i] = b["multiplier"] - device = utils.get_auxilary_device() + + device = model.get_auxilary_device() attention_bias.attention_bias = torch.Tensor(bias).to(device) logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}") logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time)) @@ -6422,7 +6140,9 @@ def UI_2_retry(data): @socketio.on('load_model_button') @logger.catch def UI_2_load_model_button(data): - sendModelSelection() + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]}) + + #==================================================================# # Event triggered when user clicks the a model @@ -6430,23 +6150,56 @@ def UI_2_load_model_button(data): @socketio.on('select_model') @logger.catch def UI_2_select_model(data): - - #We've selected a menu - if data['model'] in model_menu: - sendModelSelection(menu=data['model']) - #We've selected a custom line - elif data['menu'] in ("NeoCustom", "GPT2Custom"): - get_model_info(data['menu'], directory=data['display_name']) - #We've selected a custom menu folder - elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data: - sendModelSelection(menu=data['model'], folder=data['path']) - #We've selected a custom menu - elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"): - sendModelSelection(menu=data['model'], folder="./models") + logger.debug("Clicked on model entry: {}".format(data)) + if data["name"] in model_menu and data['ismenu'] == "true": + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: - #We now have some model we want to potentially load. - #First we need to send the client the model parameters (layers, etc) - get_model_info(data['model']) + #Get load methods + if 'ismenu' in data and data['ismenu'] == 'false': + valid_loaders = {} + if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]: + #Here if we have a model id that's in our menu, we explicitly use that backend + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) + else: + #Here we have a model that's not in our menu structure (either a custom model or a custom path + #so we'll just go through all the possible loaders + for model_backend in model_backends: + if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) + else: + #Get directories + paths, breadcrumbs = get_folder_path_info(data['path']) + output = [] + for path in paths: + valid=False + for model_backend in model_backends: + if model_backends[model_backend].is_valid(path[1], path[0], "Custom"): + logger.debug("{} says valid".format(model_backend)) + valid=True + break + else: + logger.debug("{} says invalid".format(model_backend)) + + output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) + emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) + return + + + + +#==================================================================# +# Event triggered when user changes a model parameter and it's set to resubmit +#==================================================================# +@socketio.on('resubmit_model_info') +@logger.catch +def UI_2_resubmit_model_info(data): + valid_loaders = {} + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data) + emit("selected_model_info", {"model_backends": valid_loaders}) #==================================================================# # Event triggered when user loads a model @@ -6454,26 +6207,10 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - if not os.path.exists("settings/"): - os.mkdir("settings") - changed = True - if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"): - with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file: - file_data = file.read().split('\n')[:2] - if len(file_data) < 2: - file_data.append("0") - gpu_layers, disk_layers = file_data - if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']: - changed = False - if changed: - f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w") - f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers'])) - f.close() - koboldai_vars.colaburl = data['url'] + "/request" - koboldai_vars.model = data['model'] - koboldai_vars.custmodpth = data['path'] - print("loading Model") - load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) + logger.debug("Loading model with user input of: {}".format(data)) + model_backends[data['plugin']].set_input_parameters(data) + load_model(data['plugin']) + #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# # Event triggered when load story is clicked @@ -8095,7 +7832,8 @@ def send_one_time_messages(data, wait_time=0): # Test #==================================================================# def model_info(): - if model_config is not None: + global model_config + if 'model_config' in globals() and model_config is not None: if isinstance(model_config, dict): if 'model_type' in model_config: model_type = str(model_config['model_type']) @@ -10982,10 +10720,8 @@ for schema in config_endpoint_schemas: #==================================================================# # Final startup commands to launch Flask app #==================================================================# -def startup(): - if koboldai_vars.model == "" or koboldai_vars.model is None: - koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, **{'initial_load':True}) +def startup(command_line_backend): + socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True}) print("", end="", flush=True) @@ -10994,7 +10730,7 @@ def run(): global app global tpu_mtj_backend - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") if koboldai_vars.host: @@ -11044,7 +10780,7 @@ def run(): cloudflare = _run_cloudflared(port) koboldai_vars.cloudflare_link = cloudflare - startup() + startup(command_line_backend) if(args.localtunnel or args.ngrok or args.remote): with open('cloudflare.log', 'w') as cloudflarelog: @@ -11064,7 +10800,7 @@ def run(): else: socketio.run(app, port=port) else: - startup() + startup(command_line_backend) if args.unblock: if not args.no_ui: try: @@ -11092,13 +10828,13 @@ def run(): if __name__ == "__main__": run() else: - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") Session(app) logger.init_ok("Flask", status="OK") patch_transformers() - startup() + startup(command_line_backend) koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000 print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True) diff --git a/data/one_time_messages.json b/data/one_time_messages.json index 3062827b..7485fd15 100644 --- a/data/one_time_messages.json +++ b/data/one_time_messages.json @@ -8,5 +8,10 @@ "id": 2, "title": "Changes since last version", "message": "

New Features

\n

Phrase Biasing

\nThere is now a Phrase Biasing implementation under Settings -> Biasing. You can now encourage or discourage the AI to generate words or phrases (without needing to use a userscript)\n

Context viewer

\nThe Context Viewer allows you to see what is sent to the AI. Given that only so much text can be read by the AI at a time, it's useful to know exactly what it's looking at.\n

Story Commentary

\nStory Commentary allows custom characters to speak their mind on your story. This can be configured under Settings -> Story Commentary. Characters can be added as World Into entries with a type of \"Commentator\".\n

New Chat UI (Experimental)

\nThis new interface for Chat Mode is a more visually \"chat like\" in comparison to the old text-based mode. To activate it, ensure both Chat Mode (Home -> Game Mode) and Experimental UI (Interface -> Experimental UI) are enabled, then change the Chat Style (Interface -> Chat Style) to \"Messages\". Similarly, to the story commentators, characters can be defined in the World Info menu; if a character's name matches a chat character defined in the World Info menu, the image on the character entry will be used as an icon. \n

Tweaks

\nTweaks allow small UI changes to be mixed and matched to create a more personalized interface.\n

Attention Bias (Experimental)

\nAttention Bias hopes to cause some parts of the context to be internally weighed more than others in self attention. This is very experimental, and only works on OPT-based models for now.\n

Genre

\nThe genre menu (Author's Note -> Genre) prepends genre information to the context. You can either choose from preset genres or write your own. Works better on models trained with genre/tag information, including most new models in the model picker.\n

World Info generation

\nWorld Info entries can now have their text generated automatically from a title and type. Powered by whatever model you have active, so effectiveness will vary with model.\n

Drag and drop import

\nImportable files can now be dragged into the UI to load them.\n

NovelAI lorebook/card support

\nNovelAI lorebooks and cards can now be imported as World Info. If a card is uploaded, the PNG will be used as the World Info image.\n

Finder (Ctrl+K)

\nAllows jumping to various UI elements and performing actions quickly. Mode can be adjusted by pressing the clicking the mode icon or with hotkeys on an empty search box (Search: '#', World Info: '>', Inference Scratchpad: '!', Image Prompting: '?').\n

Club import wizard

\nPrompts imported from aetherroom.club with placeholders will now show a setup prompt where you can input the value of these placeholders.\n

Context menu

\nA context menu has been added and is available in several areas. Give it a try by right-clicking on the main text area.\n

Substitutions

\nSubstitutions allow phrases to be replaced if you or the AI input them into the story. The default Substitutions are disabled and can be enabled with the pencil icon to the right of the entry.\n

Inference scratchpad

\nThe Inference Scratchpad is a way of prompting the AI outside in isolation; the AI will not see anything in your story, and nothing the AI responds with will be added to the story. This can be useful in scenarios where you wish to use the AI in a more generic way. For example, you could prompt it with something like \"List of fantasy names:\" to receive such a list.\n

Error notifications

\nErrors are generally less opaque to the user. Client sided errors and many server errors will show a notification detailing the error.\n

Ctrl+Click to jump to World Info entry

\nHolding Ctrl while clicking on a mention of a World Info tag will bring you to the entry.\n

Model picker indicators

\nThe model picker now has indicators showing if a model is downloaded, may achieve poor quality, or may not load on your system.\n

More shortcuts

\nPress Ctrl+? to view them.\n

Image Generation

\nYou can now generate flavor images based on the game text at each action. In the settings menu in the home tab, you can click generate image to create an image based on the current text. It will appear below the image. Hovering on the image will give you the prompt used to generate the image. You can click on the text of previous actions to see the image associated with that action and can right click on the image and hit retry to generate a new image based on that action.\nSettings for how/where the image is generated are in the left flyout menu under interface, image\n

Text to speech (Experimental)

\nText to speech is now available. To enable it go to the settings menu, enable experimental ui, then enable generate audio. Audio will be generated for your actions. Play buttons will appear next to the submit button, and right clicking an action will give you a new speak option to start reading from that point.\n

UI Mode

\nIn response to feedback, we've added different UI mode levels from simple to power user. Advanced hides some of the less used options, while Power User shows everything. Simple is very much a work in progress, but it intends to simplify the majority of settings to 3 sliders. Feel free to play with it but don't expect good results yet.\n

Presets

\nPresets are now here. Community presets are pre-loaded in KoboldAI and can be selected from the settings tab in the settings menu, or from the home screen. In addition, you can save your own presets and share them with others (or send them to use for future inclusion). Presets are saved in the presets folder\n

Alt Text Gen

\nWith this setting on the system will insert world info text the sentence before the word that triggers it in the AI text. This should make the AI pay more attention to it and make it more likely to influence the output.\n

Alt Multi Gen

\nIf set multiple generations will be generated sequentially rather than at once. This reduces the amount of VRAM required and can let you generate multiple story options with more demanding models at the potential expense of speed\n

Beep on Complete

\nThere is now an option in the settings menu, interface tab called been on complete. If set the browser will beep when generation is complete. Useful for slow systems\n

Privacy Screen (Experimental)

\nBy hitting ctrl+L the screen will be blurred for all users until the password is entered and unlock is clicked (password is set in the settings menu, interface tab.\n

Change Game Text Size

\nGame text can be adjusted to any size\n

No double spaces

\nWhen set double spaces will be replaced by single spaces\n

Themes

\nWe now have a theming engine. Themes can come in 3 flavors. Palette themes use a more basic theming system entirely in the UI. Select the colors from the Palette section and things will change. Advanced themes can have various variables set manually (click the advanced theme button to see). These allow you to go a level deeper than the palette system. Finally, we have custom themes. These are custom CSS code that can do almost anything. All themes can be saved and shared. Saved themes are stored in the themes folder\n

Auto Memory (Experimental)

\nThe start of auto-memory is in place and we are looking for feedback. It currently generates the summary but does not put it in memory (though you can copy-paste it). To see it, turn on experimental ui, go to the story menu, memory tab and click generate under auto-memory. \n

General Notes

\nIf you want a place to write stuff down that saves with the story but doesn't affect it, that's what the notes tab is for. It is found under the story menu, Notes tab\n

W++ (or SBF)

\nIn world info entries you can turn on w++ mode. This will allow you to enter data in the W++ format without having to actually write it.\n

World Info Titles

\nWorld info entries now have titles on them to make it easier to find the one you want. Soon the world info entries will be collapsed to just the title to make navigation easier\n

Download/Upload world info folders

\nWorld info folders can now be downloaded and/or uploaded. This will let you share world info easier.\n

Game Text in AI Context

\nGame text that will be in the AI's context is now bold in the game screen. This will let you easily see where the AI will stop remembering your game (anything not bolded is \"forgotten\")\n

World info context

\nText that triggers a world info entry will now be italicized. Hovering over that text will cause a tooltip to show what the AI text is that will be added.\n

Updated help text

\nHelp text has been expanded throughout the UI.\n

Context Bar

at the bottom of the story menu is a bar that shows how much of the AI's context is in use, and by what. Different colors correspond to different data types (actions, memory, world info, etc)\n
\n
\n

Improvements

\n

Author's Note

\nThe author's note is now inserted between sentences a configurable distance from the end of the story. This should improve the coherence of generated text while keeping the author's note relevant." + }, + "3": { + "id": 3, + "title": "Changes since last version", + "message": "

New Features

\n

Modular Model Backends

Model loading is now accomplished via seperate model backend files. This will allow KoboldAI to more easily add in new model backends (examples, 4-bit, GGML, whatever developers want to add) without having to do significant code rework.

Rework of command line arguments

--breakmodel command line arguments have been deleted and if you use those you will have to pass through --model_backend and --model_parameters." } } \ No newline at end of file diff --git a/koboldai_settings.py b/koboldai_settings.py index 7bc88422..29a82406 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -647,7 +647,7 @@ class settings(object): raise class model_settings(settings): - local_only_variables = ['badwordsids', 'apikey', 'default_preset'] + local_only_variables = ['apikey', 'default_preset'] no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns', 'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition', @@ -710,7 +710,6 @@ class model_settings(settings): self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) self.sampler_order = [6, 0, 1, 2, 3, 4, 5] self.newlinemode = "n" - self.lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage self.presets = [] # Holder for presets self.selected_preset = "" self.uid_presets = [] @@ -1203,7 +1202,6 @@ class undefined_settings(settings): super().__setattr__(name, value) logger.error("{} just set {} to {} in koboldai_vars. That variable isn't defined!".format(inspect.stack()[1].function, name, value)) - class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', @@ -1211,7 +1209,7 @@ class system_settings(settings): 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', - 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', + 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab' 'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model', 'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code'] settings_name = "system" @@ -1237,7 +1235,7 @@ class system_settings(settings): self.corescript = "default.lua" # Filename of corescript to load self.gpu_device = 0 # Which PyTorch device to use when using pure GPU generation self.savedir = os.getcwd()+"\\stories" - self.hascuda = False # Whether torch has detected CUDA on the system + self.hascuda = torch.cuda.is_available() # Whether torch has detected CUDA on the system self.usegpu = False # Whether to launch pipeline with GPU support self.splist = [] self.spselect = "" # Temporary storage for soft prompt filename to load diff --git a/modeling/inference_model.py b/modeling/inference_model.py index e2329cf9..491d2b05 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -169,6 +169,18 @@ class InferenceModel: ] self.tokenizer = None self.capabilties = ModelCapabilities() + self.model_name = "Not Defined" + + def is_valid(self, model_name, model_path, menu_path, vram): + return True + + def requested_parameters(self, model_name, model_path, menu_path, vram): + return {} + + def set_input_parameters(self, parameters): + for parameter in parameters: + setattr(self, parameter, parameters[parameter]) + return def load(self, save_model: bool = False, initial_load: bool = False) -> None: """User-facing load function. Do not override this; try `_load()` instead.""" @@ -176,12 +188,19 @@ class InferenceModel: self._pre_load() self._load(save_model=save_model, initial_load=initial_load) self._post_load() + self._save_settings() + + def unload(self): + return def _pre_load(self) -> None: """Pre load hook. Called before `_load()`.""" def _post_load(self) -> None: """Post load hook. Called after `_load()`.""" + + def _save_settings(self) -> None: + """Save settings hook. Called after `_post_load()`.""" def _load(self, save_model: bool, initial_load: bool) -> None: """Main load method. All logic related to loading the model onto the diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api/class.py similarity index 63% rename from modeling/inference_models/api.py rename to modeling/inference_models/api/class.py index d25505b0..64cfd2ab 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api/class.py @@ -6,6 +6,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger @@ -17,15 +18,42 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "KoboldAI API" class APIException(Exception): """To be used for errors when using the Kobold API as an interface.""" -class APIInferenceModel(InferenceModel): - def __init__(self, base_url: str) -> None: +class model_backend(InferenceModel): + def __init__(self) -> None: super().__init__() - self.base_url = base_url.rstrip("/") + self.base_url = "" + self.model_name = "KoboldAI API" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "API" + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self): + with open("settings/api.model_backend.settings", "r") as f: + self.base_url = json.load(f)['base_url'] + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "base_url", + "default": self.base_url, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, parameters): + self.base_url = parameters['base_url'].rstrip("/") def _load(self, save_model: bool, initial_load: bool) -> None: tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"] @@ -35,6 +63,10 @@ class APIInferenceModel(InferenceModel): # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + def _save_settings(self): + with open("settings/api.model_backend.settings", "w") as f: + json.dump({"base_url": self.base_url}, f, indent="") + def _raw_generate( self, prompt_tokens: Union[List[int], torch.Tensor], diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api/class.py similarity index 60% rename from modeling/inference_models/basic_api.py rename to modeling/inference_models/basic_api/class.py index c96eb42c..b492c039 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api/class.py @@ -4,6 +4,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger @@ -15,19 +16,54 @@ from modeling.inference_model import ( ) +model_backend_name = "KoboldAI Old Colab Method" + class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class BasicAPIInferenceModel(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() + self.colaburl = "" # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "Colab" + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self): + with open("settings/api.model_backend.settings", "r") as f: + self.colaburl = json.load(f)['base_url'] + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "colaburl", + "default": self.colaburl, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the Colab KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, parameters): + self.colaburl = parameters['colaburl'] + + def _initialize_model(self): + return def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B") + + def _save_settings(self): + with open("settings/basic_api.model_backend.settings", "w") as f: + json.dump({"colaburl": self.colaburl}, f, indent="") def _raw_generate( self, @@ -68,7 +104,7 @@ class BasicAPIInferenceModel(InferenceModel): } # Create request - req = requests.post(utils.koboldai_vars.colaburl, json=reqdata) + req = requests.post(self.colaburl, json=reqdata) if req.status_code != 200: raise BasicAPIException(f"Bad status code {req.status_code}") diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch/class.py similarity index 92% rename from modeling/inference_models/generic_hf_torch.py rename to modeling/inference_models/generic_hf_torch/class.py index aa602b1a..fd4c2a1a 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -22,8 +22,13 @@ except ModuleNotFoundError as e: from modeling.inference_models.hf_torch import HFTorchInferenceModel +model_backend_name = "Huggingface" -class GenericHFTorchInferenceModel(HFTorchInferenceModel): +class model_backend(HFTorchInferenceModel): + + def _initialize_model(self): + return + def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True @@ -36,9 +41,9 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel): if self.model_name == "NeoCustom": self.model_name = os.path.basename( - os.path.normpath(utils.koboldai_vars.custmodpth) + os.path.normpath(self.path) ) - utils.koboldai_vars.model = self.model_name + utils.koboldai_vars.model = self.model_name # If we specify a model and it's in the root directory, we need to move # it to the models directory (legacy folder structure to new) @@ -54,7 +59,7 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel): "low_cpu_mem_usage": True, } - if utils.koboldai_vars.model_type == "gpt2": + if self.model_type == "gpt2": # We must disable low_cpu_mem_usage and if using a GPT-2 model # because GPT-2 is not compatible with this feature yet. tf_kwargs.pop("low_cpu_mem_usage", None) @@ -64,12 +69,14 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel): # If we're using torch_lazy_loader, we need to get breakmodel config # early so that it knows where to load the individual model tensors + logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel)) if ( self.lazy_load and utils.koboldai_vars.hascuda - and utils.koboldai_vars.breakmodel - and not utils.koboldai_vars.nobreakmodel + and self.breakmodel + and not self.nobreakmodel ): + logger.debug("loading breakmodel") self.breakmodel_device_config(self.model_config) if self.lazy_load: @@ -241,11 +248,12 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel): self.patch_embedding() + if utils.koboldai_vars.hascuda: - if utils.koboldai_vars.usegpu: + if self.usegpu: # Use just VRAM self.model = self.model.half().to(utils.koboldai_vars.gpu_device) - elif utils.koboldai_vars.breakmodel: + elif self.breakmodel: # Use both RAM and VRAM (breakmodel) if not self.lazy_load: self.breakmodel_device_config(self.model.config) @@ -260,6 +268,11 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel): self._move_to_devices() else: self.model = self.model.to("cpu").float() - + + self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() + + def _save_settings(self): + with open("settings/{}.generic_hf_torch.model_backend.settings".format(self.model_name.replace("/", "_")), "w") as f: + json.dump({"layers": self.layers if 'layers' in vars(self) else [], "disk_layers": self.disk_layers if 'disk_layers' in vars(self) else 0}, f, indent="") \ No newline at end of file diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py new file mode 100644 index 00000000..934f15dd --- /dev/null +++ b/modeling/inference_models/gooseai/class.py @@ -0,0 +1,33 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union +import os + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + +from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend + +model_backend_name = "GooseAI" + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_backend(openai_gooseai_model_backend): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.url = "https://api.goose.ai/v1/engines" + self.source = "GooseAI" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "GooseAI" \ No newline at end of file diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 37f473ca..4226d1b1 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -1,25 +1,225 @@ -import os +import os, sys from typing import Optional from transformers import AutoConfig - +import warnings import utils +import json import koboldai_settings from logger import logger from modeling.inference_model import InferenceModel +import torch +import gc class HFInferenceModel(InferenceModel): - def __init__(self, model_name: str) -> None: + def __init__(self) -> None: super().__init__() self.model_config = None - self.model_name = model_name + #self.model_name = model_name self.model = None self.tokenizer = None + self.badwordsids = koboldai_settings.badwordsids_default + self.usegpu = False + + def is_valid(self, model_name, model_path, menu_path): + try: + if model_path is not None and os.path.exists(model_path): + self.model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + return True + except: + return False + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + requested_parameters = [] + if not self.hf_torch: + return [] + if model_name == 'customhuggingface': + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "Huggingface Model Name", + "id": "custom_model_name", + "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Model name from https://huggingface.co/", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }) + + if model_name != 'customhuggingface' or "custom_model_name" in parameters: + model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name + if model_path is not None and os.path.exists(model_path): + self.model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + break_values = temp['layers'] if 'layers' in temp else [layer_count] + disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0 + else: + break_values = [layer_count] + disk_blocks = 0 + + break_values = [int(x) for x in break_values if x != '' and x is not None] + gpu_count = torch.cuda.device_count() + break_values += [0] * (gpu_count - len(break_values)) + if disk_blocks is not None: + break_values += [int(disk_blocks)] + requested_parameters.append({ + "uitype": "Valid Display", + "unit": "text", + "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value + "id": "valid_layers", + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + for i in range(gpu_count): + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "{} Layers".format(torch.cuda.get_device_name(i)), + "id": "{}_Layers".format(i), + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": break_values[i], + "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "CPU Layers", + "id": "CPU_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": layer_count - sum(break_values), + "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + if disk_blocks is not None: + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "Disk Layers", + "id": "Disk_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": disk_blocks, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + else: + requested_parameters.append({ + "uitype": "toggle", + "unit": "bool", + "label": "Use GPU", + "id": "use_gpu", + "default": True, + "tooltip": "Whether or not to use the GPU", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + + + return requested_parameters + + def set_input_parameters(self, parameters): + if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2": + import breakmodel + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric(): + layers.append(int(parameters["{}_Layers".format(i)])) + elif isinstance(parameters["{}_Layers".format(i)], str): + layers.append(None) + else: + layers.append(parameters["{}_Layers".format(i)]) + self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None + if isinstance(self.cpu_layers, str): + self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 + self.layers = layers + self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0 + if isinstance(self.disk_layers, str): + self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 + breakmodel.gpu_blocks = layers + breakmodel.disk_blocks = self.disk_layers + self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0 + self.model_type = self.get_model_type() + self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel + self.lazy_load = True + logger.debug("Model type: {}".format(self.model_type)) + else: + logger.debug("Disabling breakmodel and lazyload") + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.breakmodel = False + self.lazy_load = False + logger.info(parameters) + self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] + self.path = parameters['path'] if 'path' in parameters else None + + def unload(self): + if hasattr(self, 'model'): + self.model = None + if hasattr(self, 'tokenizer'): + self.tokenizer = None + if hasattr(self, 'model_config'): + self.model_config = None + with torch.no_grad(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated") + for tensor in gc.get_objects(): + try: + if torch.is_tensor(tensor): + tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype)) + except: + pass + gc.collect() + try: + with torch.no_grad(): + torch.cuda.empty_cache() + except: + pass def _post_load(self) -> None: + self.badwordsids = koboldai_settings.badwordsids_default + self.model_type = str(self.model_config.model_type) # These are model specific tokenizer overrides if a model has bad defaults - if utils.koboldai_vars.model_type == "llama": + if self.model_type == "llama": # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False @@ -103,32 +303,32 @@ class HFInferenceModel(InferenceModel): return result object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) - elif utils.koboldai_vars.model_type == "opt": + elif self.model_type == "opt": self.tokenizer._koboldai_header = self.tokenizer.encode("") self.tokenizer.add_bos_token = False self.tokenizer.add_prefix_space = False # Change newline behavior to match model quirks - if utils.koboldai_vars.model_type == "xglm": + if self.model_type == "xglm": # Default to newline mode if using XGLM utils.koboldai_vars.newlinemode = "s" - elif utils.koboldai_vars.model_type in ["opt", "bloom"]: + elif self.model_type in ["opt", "bloom"]: # Handle but don't convert newlines if using Fairseq models that have newlines trained in them utils.koboldai_vars.newlinemode = "ns" # Clean up tokens that cause issues if ( - utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + self.badwordsids == koboldai_settings.badwordsids_default + and self.model_type not in ("gpt2", "gpt_neo", "gptj") ): - utils.koboldai_vars.badwordsids = [ + self.badwordsids = [ [v] for k, v in self.tokenizer.get_vocab().items() if any(c in str(k) for c in "[]") ] if utils.koboldai_vars.newlinemode == "n": - utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + self.badwordsids.append([self.tokenizer.eos_token_id]) return super()._post_load() @@ -139,9 +339,12 @@ class HFInferenceModel(InferenceModel): Returns a string of the model's path locally, or None if it is not downloaded. If ignore_existance is true, it will always return a path. """ + if self.path is not None: + if os.path.exists(self.path): + return self.path if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: - model_path = utils.koboldai_vars.custmodpth + model_path = self.path assert model_path # Path can be absolute or relative to models directory @@ -158,7 +361,7 @@ class HFInferenceModel(InferenceModel): return model_path - basename = utils.koboldai_vars.model.replace("/", "_") + basename = self.model_name.replace("/", "_") if legacy: ret = basename else: @@ -176,15 +379,15 @@ class HFInferenceModel(InferenceModel): revision=utils.koboldai_vars.revision, cache_dir="cache", ) - utils.koboldai_vars.model_type = self.model_config.model_type + self.model_type = self.model_config.model_type except ValueError: - utils.koboldai_vars.model_type = { + self.model_type = { "NeoCustom": "gpt_neo", "GPT2Custom": "gpt2", - }.get(utils.koboldai_vars.model) + }.get(self.model) - if not utils.koboldai_vars.model_type: + if not self.model_type: logger.warning( "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" ) - utils.koboldai_vars.model_type = "gpt_neo" + self.model_type = "gpt_neo" \ No newline at end of file diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj/class.py similarity index 94% rename from modeling/inference_models/hf_mtj.py rename to modeling/inference_models/hf_mtj/class.py index d7035cbf..e029db9d 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -19,18 +19,16 @@ from modeling.inference_model import ( from modeling.inference_models.hf import HFInferenceModel from modeling.tokenizer import GenericTokenizer -# This file shouldn't be imported unless using the TPU -assert utils.koboldai_vars.use_colab_tpu -import tpu_mtj_backend +model_backend_name = "Huggingface MTJ" -class HFMTJInferenceModel(HFInferenceModel): +class model_backend(HFInferenceModel): def __init__( self, - model_name: str, + #model_name: str, ) -> None: - super().__init__(model_name) - + super().__init__() + self.hf_torch = False self.model_config = None self.capabilties = ModelCapabilities( embedding_manipulation=False, @@ -39,8 +37,13 @@ class HFMTJInferenceModel(HFInferenceModel): post_token_probs=False, uses_tpu=True, ) + + def is_valid(self, model_name, model_path, menu_path): + # This file shouldn't be imported unless using the TPU + return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path) def setup_mtj(self) -> None: + import tpu_mtj_backend def mtj_warper_callback(scores) -> "np.array": scores_shape = scores.shape scores_list = scores.tolist() @@ -147,7 +150,7 @@ class HFMTJInferenceModel(HFInferenceModel): tpu_mtj_backend.socketio = utils.socketio - if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX": + if self.model_name == "TPUMeshTransformerGPTNeoX": utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox print( @@ -155,7 +158,7 @@ class HFMTJInferenceModel(HFInferenceModel): Colors.PURPLE, Colors.END ) ) - if utils.koboldai_vars.model in ( + if self.model_name in ( "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX", ) and ( @@ -165,7 +168,7 @@ class HFMTJInferenceModel(HFInferenceModel): raise FileNotFoundError( f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder" ) - if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX": + if self.model_name == "TPUMeshTransformerGPTNeoX": tpu_mtj_backend.pad_token_id = 2 tpu_mtj_backend.koboldai_vars = utils.koboldai_vars @@ -176,13 +179,15 @@ class HFMTJInferenceModel(HFInferenceModel): tpu_mtj_backend.settings_callback = mtj_settings_callback def _load(self, save_model: bool, initial_load: bool) -> None: + import tpu_mtj_backend self.setup_mtj() self.init_model_config() utils.koboldai_vars.allowsp = True + logger.info(self.model_name) tpu_mtj_backend.load_model( - utils.koboldai_vars.model, - hf_checkpoint=utils.koboldai_vars.model + self.model_name, + hf_checkpoint=self.model_name not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and utils.koboldai_vars.use_colab_tpu, socketio_queue=koboldai_settings.queue, @@ -198,7 +203,7 @@ class HFMTJInferenceModel(HFInferenceModel): if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + and self.model_type not in ("gpt2", "gpt_neo", "gptj") ): utils.koboldai_vars.badwordsids = [ [v] @@ -207,6 +212,7 @@ class HFMTJInferenceModel(HFInferenceModel): ] def get_soft_tokens(self) -> np.array: + import tpu_mtj_backend soft_tokens = None if utils.koboldai_vars.sp is None: @@ -258,6 +264,7 @@ class HFMTJInferenceModel(HFInferenceModel): seed: Optional[int] = None, **kwargs, ) -> GenerationResult: + import tpu_mtj_backend warpers.update_settings() soft_tokens = self.get_soft_tokens() diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index cc7af713..2f575e73 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False class HFTorchInferenceModel(HFInferenceModel): - def __init__( - self, - model_name: str, - lazy_load: bool, - low_mem: bool, - ) -> None: - super().__init__(model_name) - self.lazy_load = lazy_load - self.low_mem = low_mem + def __init__(self) -> None: + super().__init__() + self.hf_torch = True + self.lazy_load = True + self.low_mem = False + self.nobreakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -128,7 +125,19 @@ class HFTorchInferenceModel(HFInferenceModel): else: return "Unknown" + def get_auxilary_device(self): + """Get device auxilary tensors like inputs should be stored on.""" + + # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU. + if utils.koboldai_vars.hascuda and self.usegpu: + return utils.koboldai_vars.gpu_device + elif utils.koboldai_vars.hascuda and self.breakmodel: + import breakmodel + return breakmodel.primary_device + return "cpu" + def _post_load(m_self) -> None: + if not utils.koboldai_vars.model_type: utils.koboldai_vars.model_type = m_self.get_model_type() @@ -211,40 +220,6 @@ class HFTorchInferenceModel(HFInferenceModel): new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample - # PEFT Loading. This MUST be done after all save_pretrained calls are - # finished on the main model. - if utils.args.peft: - from peft import PeftModel, PeftConfig - local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft") - - # Make PEFT dir if it doesn't exist - try: - os.makedirs(local_peft_dir) - except FileExistsError: - pass - - peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_")) - logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") - - peft_installed_locally = True - possible_peft_locations = [peft_local_path, utils.args.peft] - - for i, location in enumerate(possible_peft_locations): - try: - m_self.model = PeftModel.from_pretrained(m_self.model, location) - logger.debug(f"Loaded PEFT at '{location}'") - break - except ValueError: - peft_installed_locally = False - if i == len(possible_peft_locations) - 1: - raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?") - except RuntimeError: - raise RuntimeError("Error while loading PeftModel. Are you using the correct model?") - - if not peft_installed_locally: - logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'") - m_self.model.save_pretrained(peft_local_path) - return super()._post_load() def _raw_generate( @@ -262,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel): else: gen_in = prompt_tokens - device = utils.get_auxilary_device() + device = self.get_auxilary_device() gen_in = gen_in.to(device) additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] @@ -272,19 +247,14 @@ class HFTorchInferenceModel(HFInferenceModel): with torch.no_grad(): start_time = time.time() - - # HEED & BEWARE: All arguments passed to self.model.generate MUST be - # kwargs; see https://github.com/huggingface/peft/issues/232. If they - # aren't, PeftModel will EXPLODE!!!! But nothing will happen without - # a PEFT loaded so it's sneaky. genout = self.model.generate( - input_ids=gen_in, + gen_in, do_sample=True, max_length=min( len(prompt_tokens) + max_new, utils.koboldai_vars.max_length ), repetition_penalty=1.0, - bad_words_ids=utils.koboldai_vars.badwordsids + bad_words_ids=self.badwordsids + additional_bad_words_ids, use_cache=True, num_return_sequences=batch_count, @@ -304,7 +274,6 @@ class HFTorchInferenceModel(HFInferenceModel): def _get_model(self, location: str, tf_kwargs: Dict): tf_kwargs["revision"] = utils.koboldai_vars.revision tf_kwargs["cache_dir"] = "cache" - tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code # If we have model hints for legacy model, use them rather than fall back. try: @@ -444,8 +413,6 @@ class HFTorchInferenceModel(HFInferenceModel): if not self.lazy_load: return - if utils.args.breakmodel_disklayers is not None: - breakmodel.disk_blocks = utils.args.breakmodel_disklayers disk_blocks = breakmodel.disk_blocks gpu_blocks = breakmodel.gpu_blocks @@ -489,10 +456,10 @@ class HFTorchInferenceModel(HFInferenceModel): ): device_map[key] = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else breakmodel.primary_device ) else: @@ -508,12 +475,12 @@ class HFTorchInferenceModel(HFInferenceModel): ) device = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right( @@ -607,6 +574,7 @@ class HFTorchInferenceModel(HFInferenceModel): ) ) # print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True) + #logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ") model_dict[key] = model_dict[key].materialize( f, map_location="cpu" ) @@ -617,15 +585,15 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -663,14 +631,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -726,16 +694,16 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -774,14 +742,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -815,7 +783,7 @@ class HFTorchInferenceModel(HFInferenceModel): if always_use or ( utils.koboldai_vars.hascuda and self.low_mem - and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel) + and (self.usegpu or self.breakmodel) ): original_dtype = torch.get_default_dtype() torch.set_default_dtype(torch.float16) @@ -830,6 +798,8 @@ class HFTorchInferenceModel(HFInferenceModel): device_count = torch.cuda.device_count() if device_count < 2: primary = None + logger.debug("n_layers: {}".format(n_layers)) + logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -860,155 +830,47 @@ class HFTorchInferenceModel(HFInferenceModel): n_layers = utils.num_layers(config) + logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks)) + if utils.args.cpu: breakmodel.gpu_blocks = [0] * n_layers return - elif ( - utils.args.breakmodel_gpulayers is not None - or utils.args.breakmodel_disklayers is not None - ): - try: - if not utils.args.breakmodel_gpulayers: - breakmodel.gpu_blocks = [] - else: - breakmodel.gpu_blocks = list( - map(int, utils.args.breakmodel_gpulayers.split(",")) - ) - assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count() - s = n_layers - for i in range(len(breakmodel.gpu_blocks)): - if breakmodel.gpu_blocks[i] <= -1: - breakmodel.gpu_blocks[i] = s - break - else: - s -= breakmodel.gpu_blocks[i] - assert sum(breakmodel.gpu_blocks) <= n_layers - n_layers -= sum(breakmodel.gpu_blocks) - if utils.args.breakmodel_disklayers is not None: - assert utils.args.breakmodel_disklayers <= n_layers - breakmodel.disk_blocks = utils.args.breakmodel_disklayers - n_layers -= utils.args.breakmodel_disklayers - except: - logger.warning( - "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0." - ) - breakmodel.gpu_blocks = [n_layers] - n_layers = 0 - elif utils.args.breakmodel_layers is not None: - breakmodel.gpu_blocks = [ - n_layers - max(0, min(n_layers, utils.args.breakmodel_layers)) - ] - n_layers -= sum(breakmodel.gpu_blocks) - elif utils.args.model is not None: + elif breakmodel.gpu_blocks == []: logger.info("Breakmodel not specified, assuming GPU 0") breakmodel.gpu_blocks = [n_layers] n_layers = 0 + else: - device_count = torch.cuda.device_count() - if device_count > 1: - print( - Colors.CYAN - + "\nPlease select one of your GPUs to be your primary GPU." - ) - print( - "VRAM usage in your primary GPU will be higher than for your other ones." - ) - print("It is recommended you make your fastest GPU your primary GPU.") - self.breakmodel_device_list(n_layers) - while True: - primaryselect = input("device ID> ") - if ( - primaryselect.isnumeric() - and 0 <= int(primaryselect) < device_count - ): - breakmodel.primary_device = int(primaryselect) - break - else: - print( - f"{Colors.RED}Please enter an integer between 0 and {device_count-1}.{Colors.END}" - ) - else: - breakmodel.primary_device = 0 - - print( - Colors.PURPLE - + "\nIf you don't have enough VRAM to run the model on a single GPU" - ) - print( - "you can split the model between your CPU and your GPU(s), or between" - ) - print("multiple GPUs if you have more than one.") - print("By putting more 'layers' on a GPU or CPU, more computations will be") - print( - "done on that device and more VRAM or RAM will be required on that device" - ) - print("(roughly proportional to number of layers).") - print( - "It should be noted that GPUs are orders of magnitude faster than the CPU." - ) - print( - f"This model has{Colors.YELLOW} {n_layers} {Colors.PURPLE}layers.{Colors.END}\n" - ) - - for i in range(device_count): - self.breakmodel_device_list( - n_layers, primary=breakmodel.primary_device, selected=i - ) - print( - f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into device {i}?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n" - ) - while True: - layerselect = input("# of layers> ") - if ( - layerselect.isnumeric() or layerselect.strip() == "-1" - ) and -1 <= int(layerselect) <= n_layers: - layerselect = int(layerselect) - layerselect = n_layers if layerselect == -1 else layerselect - breakmodel.gpu_blocks.append(layerselect) - n_layers -= layerselect - break - else: - print( - f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}" - ) - if n_layers == 0: + s = n_layers + for i in range(len(breakmodel.gpu_blocks)): + if breakmodel.gpu_blocks[i] <= -1: + breakmodel.gpu_blocks[i] = s break - - if n_layers > 0: - self.breakmodel_device_list( - n_layers, primary=breakmodel.primary_device, selected=-1 - ) - print( - f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into the disk cache?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n" - ) - while True: - layerselect = input("# of layers> ") - if ( - layerselect.isnumeric() or layerselect.strip() == "-1" - ) and -1 <= int(layerselect) <= n_layers: - layerselect = int(layerselect) - layerselect = n_layers if layerselect == -1 else layerselect - breakmodel.disk_blocks = layerselect - n_layers -= layerselect - break - else: - print( - f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}" - ) + else: + s -= breakmodel.gpu_blocks[i] + assert sum(breakmodel.gpu_blocks) <= n_layers + n_layers -= sum(breakmodel.gpu_blocks) + if breakmodel.disk_blocks is not None: + assert breakmodel.disk_blocks <= n_layers + n_layers -= breakmodel.disk_blocks logger.init_ok("Final device configuration:", status="Info") self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device) + with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file: + file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks)) # If all layers are on the same device, use the old GPU generation mode while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: breakmodel.gpu_blocks.pop() + self.breakmodel = True if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in ( -1, utils.num_layers(config), ): - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = True + logger.debug("All layers on same GPU. Breakmodel disabled") + self.breakmodel = False + self.usegpu = True utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1 return @@ -1017,6 +879,6 @@ class HFTorchInferenceModel(HFInferenceModel): import breakmodel breakmodel.primary_device = "cpu" - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = False + self.breakmodel = False + self.usegpu = False return diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde/class.py similarity index 52% rename from modeling/inference_models/horde.py rename to modeling/inference_models/horde/class.py index c6294374..f7da6604 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde/class.py @@ -1,10 +1,11 @@ from __future__ import annotations -import time +import time, json import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger @@ -16,25 +17,131 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "Horde" class HordeException(Exception): """To be used for errors on server side of the Horde.""" -class HordeInferenceModel(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() + self.url = "https://horde.koboldai.net" + self.key = "0000000000" + self.models = self.get_cluster_models() + self.model_name = "Horde" + self.model = [] + # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + def is_valid(self, model_name, model_path, menu_path): + logger.debug("Horde Models: {}".format(self.models)) + return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models] + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self): + with open("settings/horde.model_backend.settings", "r") as f: + temp = json.load(f) + self.base_url = temp['url'] + self.key = temp['key'] + if 'key' in parameters: + self.key = parameters['key'] + if 'url' in parameters: + self.url = parameters['url'] + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "url", + "default": self.url if 'url' not in parameters else parameters['url'], + "tooltip": "URL to the horde.", + "menu_path": "", + "check": {"value": "", 'check': "!="}, + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": self.key if 'key' not in parameters else parameters['key'], + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": model_name, + "check": {"value": "", 'check': "!="}, + 'multiple': True, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.models, + + }]) + return requested_parameters + + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] + self.url = parameters['url'] + + def get_cluster_models(self): + # Get list of models from public cluster + try: + req = requests.get(f"{self.url}/api/v2/status/models?type=text") + except: + logger.init_err("KAI Horde Models", status="Failed") + logger.error("Provided KoboldAI Horde URL unreachable") + emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) + return + if not req.ok: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("KAI Horde Models", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") + return + + engines = req.json() + try: + engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines] + except: + logger.error(engines) + raise + logger.debug(engines) + + online_model = "" + + logger.init_ok("KAI Horde Models", status="OK") + + return engines + def _load(self, save_model: bool, initial_load: bool) -> None: + tokenizer_name = "gpt2" + if len(self.model) > 0: + if self.model[0] == "all" and len(self.model) > 1: + tokenizer_name = self.model[1] + else: + tokenizer_name = self.model[0] self.tokenizer = self._get_tokenizer( - utils.koboldai_vars.cluster_requested_models[0] - if len(utils.koboldai_vars.cluster_requested_models) > 0 - else "gpt2", + tokenizer_name ) + def _save_settings(self): + with open("settings/horde.model_backend.settings", "w") as f: + json.dump({"key": self.key, "url": self.url}, f, indent="") + def _raw_generate( self, prompt_tokens: Union[List[int], torch.Tensor], @@ -80,14 +187,14 @@ class HordeInferenceModel(InferenceModel): client_agent = "KoboldAI:2.0.0:koboldai.org" cluster_headers = { - "apikey": utils.koboldai_vars.horde_api_key, + "apikey": self.key, "Client-Agent": client_agent, } try: # Create request req = requests.post( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async", + f"{self.url}/api/v2/generate/text/async", json=cluster_metadata, headers=cluster_headers, ) @@ -125,7 +232,7 @@ class HordeInferenceModel(InferenceModel): while not finished: try: req = requests.get( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}", + f"{self.url}/api/v2/generate/text/status/{request_id}", headers=cluster_agent_headers, ) except requests.exceptions.ConnectionError: diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py deleted file mode 100644 index 1441ae2f..00000000 --- a/modeling/inference_models/openai.py +++ /dev/null @@ -1,106 +0,0 @@ -import torch -import requests -import numpy as np -from typing import List, Optional, Union - -import utils -from logger import logger -from modeling.inference_model import ( - GenerationResult, - GenerationSettings, - InferenceModel, -) - - -class OpenAIAPIError(Exception): - def __init__(self, error_type: str, error_message) -> None: - super().__init__(f"{error_type}: {error_message}") - - -class OpenAIAPIInferenceModel(InferenceModel): - """InferenceModel for interfacing with OpenAI's generation API.""" - - def _load(self, save_model: bool, initial_load: bool) -> None: - self.tokenizer = self._get_tokenizer("gpt2") - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - - if seed is not None: - logger.warning( - "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." - ) - - decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) - - # Store context in memory to use it for comparison with generated content - utils.koboldai_vars.lastctx = decoded_prompt - - # Build request JSON data - # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround - # as the koboldai_vars.model will always be OAI - if "GooseAI" in utils.koboldai_vars.configname: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_a": gen_settings.top_a, - "top_p": gen_settings.top_p, - "top_k": gen_settings.top_k, - "tfs": gen_settings.tfs, - "typical_p": gen_settings.typical, - "repetition_penalty": gen_settings.rep_pen, - "repetition_penalty_slope": gen_settings.rep_pen_slope, - "repetition_penalty_range": gen_settings.rep_pen_range, - "n": batch_count, - # TODO: Implement streaming - "stream": False, - } - else: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_p": gen_settings.top_p, - "frequency_penalty": gen_settings.rep_pen, - "n": batch_count, - "stream": False, - } - - req = requests.post( - utils.koboldai_vars.oaiurl, - json=reqdata, - headers={ - "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey, - "Content-Type": "application/json", - }, - ) - - j = req.json() - - if not req.ok: - # Send error message to web client - if "error" in j: - error_type = j["error"]["type"] - error_message = j["error"]["message"] - else: - error_type = "Unknown" - error_message = "Unknown" - raise OpenAIAPIError(error_type, error_message) - - outputs = [out["text"] for out in j["choices"]] - return GenerationResult( - model=self, - out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), - prompt=prompt_tokens, - is_whole_generation=True, - single_line=single_line, - ) diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py new file mode 100644 index 00000000..cea644ea --- /dev/null +++ b/modeling/inference_models/openai/class.py @@ -0,0 +1,33 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union +import os + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + +from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend + +model_backend_name = "OpenAI" + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_backend(openai_gooseai_model_backend): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.url = "https://api.openai.com/v1/engines" + self.source = "OpenAI" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" \ No newline at end of file diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py new file mode 100644 index 00000000..0195f650 --- /dev/null +++ b/modeling/inference_models/openai_gooseai.py @@ -0,0 +1,199 @@ +import torch +import requests,json +import numpy as np +from typing import List, Optional, Union +import os + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + + + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_backend(InferenceModel): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.key = "" + self.url = "https://api.goose.ai/v1/engines" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" or model_name == "GooseAI" + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): + with open("settings/{}.model_backend.settings".format(self.source), "r") as f: + try: + self.key = json.load(f)['key'] + except: + pass + if 'key' in parameters: + self.key = parameters['key'] + self.source = model_name + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": self.key, + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.get_oai_models(), + + }]) + return requested_parameters + + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model_name = parameters['model'] + + def get_oai_models(self): + if self.key == "": + return [] + + + # Get list of models from OAI + logger.init("OAI Engines", status="Retrieving") + req = requests.get( + self.url, + headers = { + 'Authorization': 'Bearer '+self.key + } + ) + if(req.status_code == 200): + r = req.json() + engines = r["data"] + try: + engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] + except: + logger.error(engines) + raise + + online_model = "" + + + logger.init_ok("OAI Engines", status="OK") + logger.debug("OAI Engines: {}".format(engines)) + return engines + else: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("OAI Engines", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) + return [] + + + def _load(self, save_model: bool, initial_load: bool) -> None: + self.tokenizer = self._get_tokenizer("gpt2") + + def _save_settings(self): + with open("settings/{}.model_backend.settings".format(self.source), "w") as f: + json.dump({"key": self.key}, f, indent="") + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ) -> GenerationResult: + + if seed is not None: + logger.warning( + "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." + ) + + decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) + + # Store context in memory to use it for comparison with generated content + utils.koboldai_vars.lastctx = decoded_prompt + + # Build request JSON data + # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround + # as the koboldai_vars.model will always be OAI + if self.source == "GooseAI": + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_a": gen_settings.top_a, + "top_p": gen_settings.top_p, + "top_k": gen_settings.top_k, + "tfs": gen_settings.tfs, + "typical_p": gen_settings.typical, + "repetition_penalty": gen_settings.rep_pen, + "repetition_penalty_slope": gen_settings.rep_pen_slope, + "repetition_penalty_range": gen_settings.rep_pen_range, + "n": batch_count, + # TODO: Implement streaming + "stream": False, + } + else: + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_p": gen_settings.top_p, + "frequency_penalty": gen_settings.rep_pen, + "n": batch_count, + "stream": False, + } + + req = requests.post( + "{}/{}/completions".format(self.url, self.model_name), + json=reqdata, + headers={ + "Authorization": "Bearer " + self.key, + "Content-Type": "application/json", + }, + ) + + j = req.json() + + if not req.ok: + # Send error message to web client + if "error" in j: + error_type = j["error"]["type"] + error_message = j["error"]["message"] + else: + error_type = "Unknown" + error_message = "Unknown" + raise OpenAIAPIError(error_type, error_message) + + outputs = [out["text"] for out in j["choices"]] + return GenerationResult( + model=self, + out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py new file mode 100644 index 00000000..98573990 --- /dev/null +++ b/modeling/inference_models/readonly/class.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, + ModelCapabilities, +) + +model_backend_name = "Read Only" + +class BasicAPIException(Exception): + """To be used for errors when using the Basic API as an interface.""" + + +class model_backend(InferenceModel): + def __init__(self) -> None: + super().__init__() + + # Do not allow API to be served over the API + self.capabilties = ModelCapabilities(api_host=False) + self.tokenizer = self._tokenizer() + self.model = None + self.model_name = "Read Only" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "ReadOnly" + + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + requested_parameters = [] + return requested_parameters + + def set_input_parameters(self, parameters): + return + + def unload(self): + utils.koboldai_vars.noai = False + + def _initialize_model(self): + return + + class _tokenizer(): + def __init__(self): + self._koboldai_header = [] + def decode(self, _input): + return "" + def encode(self, input_text): + return [] + + def _load(self, save_model: bool = False, initial_load: bool = False) -> None: + self.tokenizer = self.tokenizer + self.model = None + utils.koboldai_vars.noai = True + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ): + return GenerationResult( + model=self, + out_batches=np.array([]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/static/application.js b/static/application.js index df51b06e..11fba578 100644 --- a/static/application.js +++ b/static/application.js @@ -1,3 +1,5 @@ + + //=================================================================// // VARIABLES //=================================================================// @@ -2333,6 +2335,8 @@ $(document).ready(function(){ socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); socket.on('error_popup', function(data){error_popup(data);}); + socket.on('open_model_load_menu', function(data){show_model_menu(data);}); + socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('from_server', function(msg) { //console.log(msg); @@ -3332,28 +3336,6 @@ $(document).ready(function(){ hideLoadPopup(); }); - load_model_accept.on("click", function(ev) { - hideMessage(); - var gpu_layers; - var message; - if($("#modellayers")[0].classList.contains('hidden')) { - gpu_layers = "," - } else { - gpu_layers = "" - for (let i=0; i < $("#gpu_count")[0].value; i++) { - gpu_layers += $("#gpu_layers"+i)[0].value + ","; - } - } - var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0; - models = getSelectedOptions(document.getElementById('oaimodel')); - if (models.length == 1) { - models = models[0]; - } - message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models}; - socket.send(message); - loadmodelcontent.html(""); - hideLoadModelPopup(); - }); sp_close.on("click", function(ev) { hideSPPopup(); @@ -3388,8 +3370,9 @@ $(document).ready(function(){ }); button_loadmodel.on("click", function(ev) { - showLoadModelPopup(); - socket.send({'cmd': 'list_model', 'data': 'mainmenu'}); + //showLoadModelPopup(); + //socket.send({'cmd': 'list_model', 'data': 'mainmenu'}); + socket.emit('load_model_button', {}); }); button_showmodel.on("click", function(ev) { socket.send({'cmd': 'show_model', 'data': ''}); @@ -3836,3 +3819,713 @@ function show_message(data) { document.getElementById('message-popup').classList.remove('hidden'); } + + + + + + + +//-----------------------------------------------------Copy from UI2-------------------------------------------------------- +function show_model_menu(data) { + console.log(data); + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); + } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + model_plugin.classList.add("hidden"); + var accept = document.getElementById("btn_loadmodelaccept"); + accept.disabled = false; + + //clear out the breadcrumbs + var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs') + while (breadcrumbs.firstChild) { + breadcrumbs.removeChild(breadcrumbs.firstChild); + } + + //add breadcrumbs + if ('breadcrumbs' in data) { + for (item of data.breadcrumbs) { + var button = document.createElement("button"); + button.classList.add("breadcrumbitem"); + button.setAttribute("model", data.menu); + button.setAttribute("folder", item[0]); + button.textContent = item[1]; + button.onclick = function () { + socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")}); + }; + breadcrumbs.append(button); + var span = document.createElement("span"); + span.textContent = "\\"; + breadcrumbs.append(span); + } + } + //clear out the items + var model_list = document.getElementById('loadmodellistcontent') + while (model_list.firstChild) { + model_list.removeChild(model_list.firstChild); + } + //add items + for (item of data.items) { + var list_item = document.createElement("span"); + list_item.classList.add("model_item"); + + //create the folder icon + var folder_icon = document.createElement("span"); + folder_icon.classList.add("material-icons-outlined"); + folder_icon.classList.add("cursor"); + + let isModel = !( + item.isMenu || + item.label === "Load a model from its directory" || + item.label === "Load an old GPT-2 model (eg CloverEdition)" + ); + + folder_icon.textContent = isModel ? "psychology" : "folder"; + list_item.append(folder_icon); + + + //create the actual item + var popup_item = document.createElement("span"); + popup_item.classList.add("model"); + for (const key in item) { + if (key == "name") { + popup_item.id = item[key]; + } + popup_item.setAttribute(key, item[key]); + } + + popup_item.onclick = function() { + var attributes = this.attributes; + var obj = {}; + + for (var i = 0, len = attributes.length; i < len; i++) { + obj[attributes[i].name] = attributes[i].value; + } + //put the model data on the accept button so we can send it to the server when you accept + var accept = document.getElementById("popup_accept"); + selected_model_data = obj; + //send the data to the server so it can figure out what data we need from the user for the model + socket.emit('select_model', obj); + + //clear out the selected item and select this one visually + for (const element of document.getElementsByClassName("model_menu_selected")) { + element.classList.remove("model_menu_selected"); + } + this.closest(".model_item").classList.add("model_menu_selected"); + } + + //name text + var text = document.createElement("span"); + text.style="grid-area: item;"; + text.textContent = item.label; + popup_item.append(text); + //model size text + var text = document.createElement("span"); + text.textContent = item.size; + text.style="grid-area: gpu_size;padding: 2px;"; + popup_item.append(text); + + (function() { + // Anon function to avoid unreasonable indentation + if (!isModel) return; + + let parameterCount = getModelParameterCount(item.label); + if (!parameterCount) return; + + let warningText = ""; + + if (parameterCount > 25_000_000_000) warningText = "This is a very high-end model and will likely not run without a specialized setup."; // 25B + if (parameterCount < 2_000_000_000) warningText = "This is a lower-end model and may perform poorly."; // 2B + if (parameterCount < 1_000_000_000) warningText = "This is a very low-end model and may perform incoherently."; // 1B + + if (!warningText) return; + $e("span", list_item, { + classes: ["material-icons-outlined", "model-size-warning"], + innerText: "warning", + "style.grid-area": "warning_icon", + tooltip: warningText + }); + + })(); + + (function() { + // Anon function to avoid unreasonable indentation + if (!item.isDownloaded) return; + if (!isModel) return; + + $e("span", list_item, { + classes: ["material-icons-outlined", "model-download-notification"], + innerText: "download_done", + "style.grid-area": "downloaded_icon", + tooltip: "This model is already downloaded." + }); + })(); + + list_item.append(popup_item); + model_list.append(list_item); + } + + + openPopup("load-model"); + +} + +function model_settings_checker() { + //get check value: + missing_element = false; + if (this.check_data != null) { + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid || missing_element) { + //if we are supposed to refresh when this value changes we'll resubmit + if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + if (this.check_data['check_message']) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + if (this.check_data['check_message']) { + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } + } + var accept = document.getElementById("btn_loadmodelaccept"); + ok_to_load = true; + for (const item of document.getElementsByClassName("input_error")) { + if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) { + ok_to_load = false; + break; + } + } + + if (ok_to_load) { + accept.classList.remove("disabled"); + accept.disabled = false; + } else { + accept.classList.add("disabled"); + accept.disabled = true; + } + + + //We now have valid display boxes potentially. We'll go through them and update the display + for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) { + check_value = 0 + missing_element = false; + for (const temp of item.check_data['sum']) { + if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + if (!missing_element) { + item.innerText = item.original_text.replace("%1", check_value); + } + + + } +} + +function selected_model_info(sent_data) { + const data = sent_data['model_backends']; + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); + } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + + var accept = document.getElementById("btn_loadmodelaccept"); + accept.disabled = false; + + modelplugin = document.getElementById("modelplugin"); + modelplugin.classList.remove("hidden"); + modelplugin.onchange = function () { + for (const area of document.getElementsByClassName("model_plugin_settings_area")) { + area.classList.add("hidden"); + } + if (document.getElementById(this.value + "_settings_area")) { + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + model_settings_checker() + } + //create the content + for (const [loader, items] of Object.entries(data)) { + model_area = document.createElement("DIV"); + model_area.id = loader + "_settings_area"; + model_area.classList.add("model_plugin_settings_area"); + model_area.classList.add("hidden"); + modelpluginoption = document.createElement("option"); + modelpluginoption.innerText = loader; + modelpluginoption.value = loader; + modelplugin.append(modelpluginoption); + + //create the user input for each requested input + for (item of items) { + let new_setting = document.getElementById('blank_model_settings').cloneNode(true); + new_setting.id = loader; + new_setting.classList.remove("hidden"); + new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; + new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); + + onchange_event = model_settings_checker; + if (item['uitype'] == "slider") { + var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); + slider_number.value = item['default']; + slider_number.id = loader + "|" + item['id'] + "_value_text"; + slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;}; + + var slider = new_setting.querySelector('#blank_model_settings_slider'); + slider.value = item['default']; + slider.min = item['min']; + slider.max = item['max']; + slider.setAttribute("data_type", item['unit']); + slider.id = loader + "|" + item['id'] + "_value"; + if ('check' in item) { + slider.check_data = item['check']; + slider_number.check_data = item['check']; + } else { + slider.check_data = null; + slider_number.check_data = null; + } + slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;}; + slider.onchange = onchange_event; + slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; + new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.noresubmit = true; + slider.onchange(); + slider.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_slider').remove(); + } + if (item['uitype'] == "toggle") { + toggle = document.createElement("input"); + toggle.type='checkbox'; + toggle.classList.add("setting_item_input"); + toggle.classList.add("blank_model_settings_input"); + toggle.classList.add("model_settings_input"); + toggle.id = loader + "|" + item['id'] + "_value"; + toggle.checked = item['default']; + toggle.onclick = onchange_event; + toggle.setAttribute("data_type", item['unit']); + toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + toggle.check_data = item['check']; + } else { + toggle.check_data = null; + } + new_setting.querySelector('#blank_model_settings_toggle').append(toggle); + setTimeout(function() { + $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"}); + }, 200); + toggle.noresubmit = true; + toggle.onclick(); + toggle.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_toggle').remove(); + } + if (item['uitype'] == "dropdown") { + var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); + select_element.id = loader + "|" + item['id'] + "_value"; + for (const dropdown_value of item['children']) { + new_option = document.createElement("option"); + new_option.value = dropdown_value['value']; + new_option.innerText = dropdown_value['text']; + select_element.append(new_option); + } + select_element.value = item['default']; + select_element.setAttribute("data_type", item['unit']); + select_element.onchange = onchange_event; + select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if (('multiple' in item) && (item['multiple'])) { + select_element.multiple = true; + select_element.size = 10; + } + if ('check' in item) { + select_element.check_data = item['check']; + } else { + select_element.check_data = null; + } + select_element.noresubmit = true; + select_element.onchange(); + select_element.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_dropdown').remove(); + } + if (item['uitype'] == "password") { + var password_item = new_setting.querySelector('#blank_model_settings_password'); + password_item.id = loader + "|" + item['id'] + "_value"; + password_item.value = item['default']; + password_item.setAttribute("data_type", item['unit']); + password_item.onchange = onchange_event; + password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + password_item.check_data = item['check']; + } else { + password_item.check_data = null; + } + password_item.noresubmit = true; + password_item.onchange(); + password_item.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_password').remove(); + } + if (item['uitype'] == "text") { + var text_item = new_setting.querySelector('#blank_model_settings_text'); + text_item.id = loader + "|" + item['id'] + "_value"; + text_item.value = item['default']; + text_item.onchange = onchange_event; + text_item.setAttribute("data_type", item['unit']); + text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + text_item.check_data = item['check']; + } else { + text_item.check_data = null; + } + text_item.noresubmit = true; + text_item.onchange(); + text_item.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_text').remove(); + } + + if (item['uitype'] == "Valid Display") { + new_setting = document.createElement("DIV"); + new_setting.classList.add("model_settings_valid_display"); + new_setting.id = loader + "|" + item['id'] + "_value"; + new_setting.innerText = item['label']; + new_setting.check_data = item['check']; + new_setting.original_text = item['label']; + } + + model_area.append(new_setting); + loadmodelsettings.append(model_area); + } + } + + //unhide the first plugin settings + if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { + document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); + } + + model_settings_checker() + +} + +function getModelParameterCount(modelName) { + if (!modelName) return null; + + // The "T" and "K" may be a little optimistic... + let paramsString = modelName.toUpperCase().match(/[\d.]+[TBMK]/) + if (!paramsString) return null; + paramsString = paramsString[0]; + + let base = parseFloat(paramsString); + let multiplier = {T: 1_000_000_000_000, B: 1_000_000_000, M: 1_000_000, K: 1_000}[paramsString[paramsString.length - 1]]; + + return base * multiplier; +} + +function openPopup(id) { + closePopups(); + + const container = document.getElementById("popup-container"); + container.classList.remove("hidden"); + + for (const popupWindow of container.children) { + popupWindow.classList.add("hidden"); + } + + const popup = document.getElementById(`${id}`); + popup.classList.remove("hidden"); + + // Sometimes we want to instantly focus on certain elements when a menu opens. + for (const noticeMee of popup.getElementsByClassName("focus-on-me")) { + noticeMee.focus(); + break; + } +} + +function closePopups() { + const container = document.getElementById("popup-container"); + container.classList.add("hidden"); + + for (const popupWindow of container.children) { + popupWindow.classList.add("hidden"); + } +} + +function $el(selector) { + // We do not preemptively fetch all elements upon execution (wall of consts) + // due to the layer of mental overhead it adds to debugging and reading + // code in general. + return document.querySelector(selector); +} + +function $e(tag, parent, attributes, insertionLocation=null) { + // Small helper function for dynamic UI creation + + let element = document.createElement(tag); + + if (!attributes) attributes = {}; + + if ("classes" in attributes) { + if (!Array.isArray(attributes.classes)) throw Error("Classes was not array!"); + for (const className of attributes.classes) { + element.classList.add(className); + } + delete attributes.classes; + } + + + for (const [attribute, value] of Object.entries(attributes)) { + if (attribute.includes(".")) { + let ref = element; + const parts = attribute.split("."); + + for (const part of parts.slice(0, -1)) { + ref = ref[part]; + } + + ref[parts[parts.length - 1]] = value; + continue; + } + + if (attribute in element) { + element[attribute] = value; + } else { + element.setAttribute(attribute, value); + } + } + + if (!parent) return element; + + if (insertionLocation && Object.keys(insertionLocation).length) { + let [placement, target] = Object.entries(insertionLocation)[0]; + if (placement === "before") { + parent.insertBefore(element, target); + } else if (placement === "after") { + parent.insertBefore(element, target.nextSibling); + } else { + throw Error(`I have no clue what placement ${placement} is`); + } + } else { + parent.appendChild(element); + } + + return element; +} + +function load_model() { + var accept = document.getElementById('btn_loadmodelaccept'); + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + + //get an object of all the input settings from the user + data = {} + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if ((element.tagName == "SELECT") && (element.multiple)) { + element_data = []; + for (var i=0, iLen=element.options.length; i 0) x -= xOverflow; + + if (xOverflow + xOffset < 0) x += xOffset; + + // Same for Y! + let yOverflow = (y + tooltip.clientHeight) - window.innerHeight; + if (yOverflow > 0) y -= yOverflow; + + if (yOverflow + yOffset < 0) y += yOffset; + + tooltip.style.left = `${x}px`; + tooltip.style.top = `${y}px`; + }); + + // Inital scan + for (const element of document.querySelectorAll("[tooltip]")) { + registerElement(element); + } + + // Use a MutationObserver to catch future tooltips + const observer = new MutationObserver(function(records, observer) { + for (const record of records) { + + if (record.type === "attributes") { + // Sanity check + if (record.attributeName !== "tooltip") continue; + registerElement(record.target); + continue; + } + + // If we remove the tooltip target, stop showing the tooltip. Maybe a little ineffecient. + if (!document.body.contains(tooltipTarget)) alterTooltipState(null); + + for (const node of record.addedNodes) { + if (node.nodeType !== 1) continue; + + if (node.hasAttribute("tooltip")) registerElement(node); + + // Register for descendants (Slow?) + for (const element of node.querySelectorAll("[tooltip]")) { + registerElement(element); + } + } + } + }); + observer.observe(document.body, { + childList: true, + subtree: true, + attributeFilter: ["tooltip"], + }); +} + +// Must be done before any elements are made; we track their changes. +console.log(document.body); +initalizeTooltips(); \ No newline at end of file diff --git a/static/custom.css b/static/custom.css index 3e266701..25aa7818 100644 --- a/static/custom.css +++ b/static/custom.css @@ -1728,4 +1728,691 @@ body.connected .popupfooter, .popupfooter.always-available { .wientry > .input-token-usage { bottom: 8px; +} + + +/*----------------------------------------------COPY FROM UI2-----------------------------------------------------------------------*/ +:root { + /*General*/ + --background: #252e3b; + --gamescreen_background: #111820; + --input_background: #111820; + + --text: #e0e0e0; + --text_to_ai_color: #e0e0e0; + --text_edit: #9cc3ee; + --action_mode_input: #33E978; + + --statusbar_color: #eedcb880; + --statusbar_text_color: #e0e0e0; + --scrollbar-color: #2f3b4bdb; + + /*Buttons*/ + /*General*/ + --enabled_button_text: #e0e0e0; + --enabled_button_background_color: #2d3d52; + --enabled_button_border_color: #253446; + + --disabled_button_text: #303030; + --disabled_button_background_color: #495762; + --disabled_button_border_color: #686c68; + + /*Home Tab*/ + --button_text: #e0e0e0; + --button_background: #283445; + + /*Alternate Button*/ + --alternate_button_text: #e0e0e0; + --alternate_button_background: #283445; + + /*Buttons -> Icon Button*/ + --icon_button_background:; + --icon_button_color:; + --icon_button_border_color:; + + /*Context Menu*/ + --context_menu_text:; + --context_menu_background:; + --context_menu_border:; + --context_menu_division:; + --context_menu_hover_text:; + --context_menu_hover_background:; + + /*Sequence, AKA Gens Per Action*/ + --sequence_area_background: #111820; + --sequence_background: #eedcb8; + --sequence_text: #e0e0e0; + + /*Side Menus*/ + --tab_color: #243047; + + --flyout_background: #18222d; + --flyout_background_pinned: #18222d; + + --setting_background: #273141; + --setting_text: #e0e0e0; + + --sample_order_select_color: #1f2934; + --sample_order_select_color_text: #eedcb8; + + --dropdown_text: #e0e0e0; + --dropdown_background: #212935; + + --rangeslider_background_color: #1f2934; + --rangeslider_color: #1f2934; + --rangeslider_circle_color: #404d64; + + --help_icon: #7c8389; + --tooltip_text: #e0e0e0; + --tooltip_background: #303c50; + --setting_category_help_text_color: #E0E0E0; + + --setting_footer_border_color: #334552; + --setting_footer_text_color: #e0e0e0; + --setting_footer_background_color: #18222d; + + /*Substitution Card*/ + --substitution_card_input_border:; + --substitution_card_input_background:; + + /*Palette Card*/ + --palette_card_background: #273141; + --palette_card_text: #e0e0e0; + --palette_table_border: #607c90; + + /*World Info*/ + --wi_card_border_color: #334552; + --wi_card_border_color_to_ai: #eedcb880; + + --wi_card_bg_color: #223040; + --wi_card_text_color: #e0e0e0; + + --wi_card_tag_bg_color: #1d2835; + --wi_card_tag_text_color: #e0e0e0; + + --wi_tag_color: #283445; + --wi_tag_text_color: #e0e0e0; + + /*Popup*/ + --popup_background_color: #1a2530; + --popup_title_bar_color: #283445; + --popup_title_bar_color_text: #e0e0e0; + + --popup_item_color: #1a2530; + --popup_item_color_text: #e0e0e0; + + --popup_hover_color: #1e2733; + --popup_hover_color_text: #e0e0e0; + --popup_selected_color: #242d3c; + --popup_selected_color_text: #eedcb8; + + --popup_button_color: #283445; + --popup_button_color_text: #e0e0e0; + --popup_cancel_button_color: #25364a; + --popup_cancel_button_color_text: #e0e0e0; + + --error: #19242c; + --error_text: #e0e0e0; + --error_title: #25364a; + --error_title_text: #e0e0e0; + + /*Context Bar Colors*/ + --context_colors_memory: #04325c; + --context_colors_authors_notes: #165a62; + --context_colors_world_info: #1864a3; + --context_colors_prompt: #868686; + --context_colors_game_text: #63710e; + --context_colors_submit: #ffffff00; + --context_colors_unused: #ffffff24; + --context_colors_soft_prompt: #141414; + --context_colors_genre: #2c5c88; + + /*Parameters*/ + --scrollbar-size: 6px; + --palette_card_shadow: 0; + --wi_card_shadow: 0; + --light_shadow_value: 0; + --left_menu_strong_shadow: 0; + --right_menu_light_shadow: 0; + --right_menu_strong_shadow: 0; + --context_menu_shadow: var(--wi_card_shadow); + --radius_inputbox: 2px; + --radius_unpinned_menu: 2px; + --radius_sequence: 5px; + --radius_settings_background: 2px; + --radius_button: 2px; + --radius_alternate_button: 2px; + --radius_item_popup: 2px; + --radius_wi_card: 5px; + --radius_palette_card: 5px; + --radius_settings_button: 2px; + --tabs_rounding: 2px; + --radius_context_menu: 2px; + --radius_context_menu_hover: 2px; + --radius_genre_tag: 2px; + --radius_tooltip: 2px; + + + + + +/*----------------VARIABLES--------------------*/ + --flyout_menu_closed_width: 0px; + --setting_menu_closed_width_no_pins_width: 0px; + --story_options_size: 30%; + --story_pinned_areas_left:"menuicon options gamescreen lefticon" + "menuicon theme theme lefticon" + "menuicon inputrow inputrow lefticon"; + --story_pinned_areas_right:"menuicon gamescreen options lefticon" + "menuicon theme theme lefticon" + "menuicon inputrow inputrow lefticon"; + --story_pinned_area_widths_left: 30pxvar(--story_options_size) auto 30px; + --story_pinned_area_widths_right: 30pxautovar(--story_options_size) 30px; + --story_pinned_areas:var(--story_pinned_areas_left); + --story_pinned_area_widths:var(--story_pinned_area_widths_left); + --font_size_adjustment: 0px; + --game_screen_font_size_adjustment: 1;} +} + +/*---------------------------------- Popups -------------------------------------------------*/ +@media only screen and (max-aspect-ratio: 7/5) { + .popup { + position: absolute; + top: 10vh; + left: 10%; + z-index: 999; + width: 80%; + height: 80vh; + border-radius: 15px; + box-shadow: var(--popup_shadow); + background-color: var(--popup_background_color); + display: flex; + flex-direction: column; + overflow: hidden; + } +} + +@media only screen and (min-aspect-ratio: 7/5) { + .popup { + position: absolute; + top: 10vh; + left: 25%; + z-index: 999; + width: 50%; + height: 80vh; + border-radius: 15px; + box-shadow: var(--popup_shadow); + background-color: var(--popup_background_color); + display: flex; + flex-direction: column; + overflow: hidden; + } +} + +.popup .title { + width: 100%; + background-color: var(--popup_title_bar_color); + color: var(--popup_title_bar_color_text); + text-align: center; + font-size: calc(1.3em + var(--font_size_adjustment)); +} + +.popup .action_button { + background-color: var(--popup_button_color); + color: var(--popup_button_color_text); +} + +.popup .popup_list_area { + overflow-x: hidden; + overflow-y: scroll; + flex-grow: 1; + flex-shrink: 1; + flex-basis: auto; + color: var(--popup_item_color_text); + +} + +#modelspecifier, .popup .model_item { + width: 98%; + background-color: var(--popup_item_color); + color: var(--popup_item_color_text); + margin: 5px 0 5px 1%; + border-radius: var(--radius_item_popup); + padding: 2px; + display: grid; + grid-template-areas: "folder_icon delete_icon edit_icon rename_icon file gpu_size warning_icon downloaded_icon"; + grid-template-columns: 30px 0px 0px 0px auto 50px 30px 30px; + +} + +.popup .model_item .folder_icon { + grid-area: folder_icon; +} + +.popup .model_item .edit_icon { + grid-area: edit_icon; +} + +.popup .model_item .rename_icon { + grid-area: rename_icon; +} + +.popup .model_item .delete_icon { + grid-area: delete_icon; +} + +.popup .model_item .model { + cursor: pointer; + grid-area: file; +} + +.popup .header { + width: 98%; + background-color: var(--popup_item_color); + color: var(--popup_item_color_text); + margin: 5px 0 5px 1%; + padding: 2px; +} + +.popup .item { + width: 98%; + background-color: var(--popup_item_color); + color: var(--popup_item_color_text); + margin: 5px 0 5px 1%; + border-radius: var(--radius_item_popup); + padding: 2px; +} + +.popup .item:hover { + background-color: var(--popup_hover_color); + color: var(--popup_hover_color_text); +} + +.popup .item.selected { + background: var(--popup_selected_color); + color: var(--popup_selected_color_text); +} + +.popup .popup_load_cancel { + text-align: center; + vertical-align: bottom; + color: var(--popup_title_bar_color_text); + background-color: var(--popup_title_bar_color); + padding: 0 10px 0 10px; +} + + +.popup_load_cancel_button { + color: var(--popup_cancel_button_color_text); + border-color: var(--popup_cancel_button_color_text); + background-color: var(--popup_cancel_button_color); + vertical-align: bottom; + display: inline; +} + +.table-header-container { + display: flex; + justify-content: space-between; + cursor: pointer; +} + +.table-header-sort-icon { + margin-right: 10px; + margin-top: 2px; +} + +.table-header-label { + margin-top: 4px; +} + +#error_message.popup { + background-color: var(--error); + color: var(--error_text); + overflow: hidden; +} + +#error_message .title { + width: 100%; + background-color: var(--error_title); + color: var(--error_title_text); + text-align: center; + font-size: calc(1.3em + var(--font_size_adjustment)); +} + +#error_message.popup .btn-primary { + background-color: var(--error); + color: var(--error_text); + border-color: var(--error_text); +} + + +#error_message .popup_load_cancel { + background-color: var(--error_title); + color: var(--error_title_text); +} + + +#error_message.popup .popup_list_area { + overflow-x: hidden; + overflow-y: scroll; + flex-grow: 1; + flex-shrink: 1; + flex-basis: auto; + background-color: var(--error); + color: var(--error_text); +} + +.breadcrumbitem { + padding: 5px 10px 5px 10px; + color: #ffffff; + background-color: transparent; + border: none; + + -moz-transition: background-color 0.25s ease-in; + -o-transition: background-color 0.25s ease-in; + -webkit-transition: background-color 0.25s ease-in; + transition: background-color 0.25s ease-in; +} + +.breadcrumbitem:hover { + cursor: pointer; + background-color: #688f1f; +} + +.loadmodelsettings { + overflow-y: auto; + max-height: 50%; +} + + +/*----------------------------- Model Load Popup ------------------------------------------*/ + +#specspan, .popup_list_area .model_item .model { + grid-area: file; + display: grid; + grid-template-areas: "item gpu_size"; + grid-template-columns: auto 95px; + cursor: pointer; +} + +#specspan { + grid-template-columns: auto 100px !important; + cursor: auto !important; +} + +#model-spec-usage { + position: relative; + left: -20px; +} + +.popup .model_item:hover { + background-color: var(--popup_hover_color); + color: var(--popup_hover_color_text); +} + +.popup .model_item .selected { + background: var(--popup_selected_color); + color: var(--popup_selected_color_text); +} + +.model_setting_container { + display: grid; + grid-template-areas: "label label" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px 16px 19px; + grid-template-columns: auto 40px; + row-gap: 0.2em; + border: 1px; + margin: 2px; +} + +.model_setting_minlabel { + grid-area: minlabel; + padding-top: 3px; + color: var(--popup_title_bar_color_text); + overflow: hidden; + text-align: left; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.model_setting_maxlabel { + color: var(--popup_title_bar_color_text); + padding-top: 3px; + grid-area: maxlabel; + overflow: hidden; + text-align: right; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.model_setting_label { + color: var(--popup_title_bar_color_text); + grid-area: label; + overflow: hidden; + text-align: left; +} + +.model_setting_value { + color: var(--popup_title_bar_color_text); + text-align: left; + grid-area: label; + background-color: inherit; + color: inherit; + border: none; + outline: none; +} + +.model_setting_value:focus { + color: var(--text_edit); +} + +.model_setting_item { + + grid-area: item; + overflow: hidden; +} + +.model_setting_item_input { + width:95%; +} + +@font-face { + font-family: 'Material Icons Outlined'; + font-style: normal; + src: url(/static/MaterialIconsOutlined-Regular.otf) format('opentype'); +} + +.material-icons-outlined { + font-family: 'Material Icons Outlined'; + font-weight: normal; + font-style: normal; + font-size: calc(24px + var(--font_size_adjustment)); /* Preferred icon size */ + display: inline-block; + line-height: 1; + text-transform: none; + letter-spacing: normal; + word-wrap: normal; + white-space: nowrap; + direction: ltr; + + /* Support for all WebKit browsers. */ + -webkit-font-smoothing: antialiased; + /* Support for Safari and Chrome. */ + text-rendering: optimizeLegibility; + + /* Support for Firefox. */ + -moz-osx-font-smoothing: grayscale; + + /* Support for IE. */ + font-feature-settings: 'liga'; +} + +.material-icons-outlined.cursor:hover{ + filter: brightness(85%); +} + +.setting_label .helpicon { + color: var(--help_icon); + cursor: help; + font-size: calc(14px + var(--font_size_adjustment)) !important; + flex: auto; + width: 15px; + align-self: flex-end; + + line-height: inherit; + border-radius: inherit; + margin-right: inherit; + padding: inherit; + background: inherit; + border: inherit; + text-decoration: inherit; + +} + +#tooltip-text { + content: attr(tooltip); + position: fixed; + transition: opacity 0s linear 0.5s; + white-space: normal; + border-radius: var(--radius_tooltip); + + opacity: 1; + + padding: 7px; + color: var(--tooltip_text); + background-color: var(--tooltip_background); + + pointer-events: none; + z-index: 9999999; +} + +.tooltip-standard { + border: 1px ridge grey; + font-family: "Helvetica Neue",Helvetica,Arial,sans-serif; + width: min-context; + max-width: 25%; + /*margin-right: -3px;*/ +} + +.tooltip-context-token { + border: none; + font-family: monospace; + max-width: min-content; +} + + +/* Mobile tooltips */ +@media (pointer: coarse), (hover: none) { + [tooltip]:after { + opacity: 0; + content: ""; + } + + [tooltip]:hover::after { + content: attr(tooltip); + position: fixed; + + top: calc(var(--mouse-y) * 100vh); + left: calc(var(--mouse-x) * 100vw); + transform: translate(var(--tooltip_x), var(--tooltip-y)); + transition: opacity 0s linear 0.5s; + opacity: 1; + + + padding: 0px 2px; + background-color: rgba(0, 0, 0, 0.6); + + pointer-events: none; + z-index: 9999999; + } +} + +.popup .model_item .model_menu_selected { + color: var(--popup_selected_color); + background-color: var(--popup_selected_color_text); +} + +.settings_select { + color: var(--dropdown_text); + background: var(--dropdown_background); + margin-left: auto; + margin-right: 25px; +} + +.setting_value { + text-align: right; + grid-area: value; + font-size: calc(12px + var(--font_size_adjustment)); + padding: 2px; + padding-top: 0px; + background-color: inherit; + color: inherit; + border: none; + outline: none; +} + +.setting_value:focus { + color: var(--text_edit); +} + +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px auto 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + +.setting_minlabel { + padding-top: 6px; + grid-area: minlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: left; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.setting_maxlabel { + padding-top: 6px; + grid-area: maxlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: right; + font-size: calc(0.8em + var(--font_size_adjustment)); + text-align: left; +} + +.setting_label { + display: flex; + grid-area: label; + overflow: hidden; + padding: 5px; + padding-right: 0px; + padding-top: 0px; +} + +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; +} + +.popup .model_item.model_menu_selected { + color: var(--popup_selected_color); + background-color: var(--popup_selected_color_text); } \ No newline at end of file diff --git a/static/koboldai.css b/static/koboldai.css index 230f1cbf..3252c21a 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding); } +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px auto 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + .setting_minlabel { padding-top: 6px; grid-area: minlabel; @@ -364,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding); padding-top: 0px; text-align: right; font-size: calc(0.8em + var(--font_size_adjustment)); + text-align: left; } .setting_label { @@ -1990,6 +2013,11 @@ body { grid-area: file; } +.popup .model_item.model_menu_selected { + color: var(--popup_selected_color); + background-color: var(--popup_selected_color_text); +} + .popup .header { width: 98%; background-color: var(--popup_item_color); @@ -2102,6 +2130,13 @@ body { cursor: pointer; background-color: #688f1f; } + +.loadmodelsettings { + overflow-y: auto; + max-height: 50%; +} + + /*----------------------------- Model Load Popup ------------------------------------------*/ #specspan, .popup_list_area .model_item .model { @@ -3370,6 +3405,23 @@ textarea { } } +@keyframes pulse-red { + 0% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7); + } + + 70% { + transform: scale(1); + box-shadow: 0 0 0 10px rgba(255, 0, 0, 0); + } + + 100% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0); + } +} + @keyframes pulse-text { 0% { filter: blur(3px); @@ -3391,6 +3443,11 @@ textarea { } } +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; +} + .single_pulse { animation: pulse-text 0.5s 1; } @@ -3495,7 +3552,7 @@ h2 .material-icons-outlined { } -.horde_trigger[model_model="ReadOnly"], +.horde_trigger[model_model="Read Only"], .horde_trigger[model_model="CLUSTER"] { display: none; } diff --git a/static/koboldai.js b/static/koboldai.js index 87beb954..99383728 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -14,7 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);}); socket.on('popup_items', function(data){popup_items(data);}); socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); -socket.on('show_model_menu', function(data){show_model_menu(data);}); +//socket.on('show_model_menu', function(data){show_model_menu(data);}); +socket.on('open_model_load_menu', function(data){show_model_menu(data);}); socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('oai_engines', function(data){oai_engines(data);}); socket.on('buildload', function(data){buildload(data);}); @@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true"; let story_id = -1; var dirty_chunks = []; var initial_socketio_connection_occured = false; +var selected_model_data; // Each entry into this array should be an object that looks like: // {class: "class", key: "key", func: callback} @@ -1501,48 +1503,50 @@ function getModelParameterCount(modelName) { } function show_model_menu(data) { - //clear old options - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - document.getElementById("modelurl").classList.add("hidden"); - document.getElementById("use_gpu_div").classList.add("hidden"); - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("modellayers").classList.add("hidden"); - document.getElementById("oaimodel").classList.add("hidden"); - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + model_plugin.classList.add("hidden"); + var accept = document.getElementById("btn_loadmodelaccept"); + accept.disabled = false; //clear out the breadcrumbs var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs') while (breadcrumbs.firstChild) { breadcrumbs.removeChild(breadcrumbs.firstChild); } - //add breadcrumbs - //console.log(data.breadcrumbs); - for (item of data.breadcrumbs) { - var button = document.createElement("button"); - button.classList.add("breadcrumbitem"); - button.setAttribute("model", data.menu); - button.setAttribute("folder", item[0]); - button.textContent = item[1]; - button.onclick = function () { - socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")}); - }; - breadcrumbs.append(button); - var span = document.createElement("span"); - span.textContent = "\\"; - breadcrumbs.append(span); - } + //add breadcrumbs + if ('breadcrumbs' in data) { + for (item of data.breadcrumbs) { + var button = document.createElement("button"); + button.classList.add("breadcrumbitem"); + button.setAttribute("model", data.menu); + button.setAttribute("folder", item[0]); + button.textContent = item[1]; + button.onclick = function () { + socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")}); + }; + breadcrumbs.append(button); + var span = document.createElement("span"); + span.textContent = "\\"; + breadcrumbs.append(span); + } + } //clear out the items var model_list = document.getElementById('loadmodellistcontent') while (model_list.firstChild) { model_list.removeChild(model_list.firstChild); } //add items - for (item of data.data) { + for (item of data.items) { var list_item = document.createElement("span"); list_item.classList.add("model_item"); @@ -1564,10 +1568,33 @@ function show_model_menu(data) { //create the actual item var popup_item = document.createElement("span"); popup_item.classList.add("model"); - popup_item.setAttribute("display_name", item.label); - popup_item.id = item.name; + for (const key in item) { + if (key == "name") { + popup_item.id = item[key]; + } + popup_item.setAttribute(key, item[key]); + } + + popup_item.onclick = function() { + var attributes = this.attributes; + var obj = {}; + + for (var i = 0, len = attributes.length; i < len; i++) { + obj[attributes[i].name] = attributes[i].value; + } + //put the model data on the accept button so we can send it to the server when you accept + var accept = document.getElementById("popup_accept"); + selected_model_data = obj; + //send the data to the server so it can figure out what data we need from the user for the model + socket.emit('select_model', obj); + + //clear out the selected item and select this one visually + for (const element of document.getElementsByClassName("model_menu_selected")) { + element.classList.remove("model_menu_selected"); + } + this.closest(".model_item").classList.add("model_menu_selected"); + } - popup_item.setAttribute("Menu", data.menu) //name text var text = document.createElement("span"); text.style="grid-area: item;"; @@ -1615,241 +1642,327 @@ function show_model_menu(data) { }); })(); - popup_item.onclick = function () { - var accept = document.getElementById("btn_loadmodelaccept"); - accept.classList.add("disabled"); - socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")}); - var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected"); - for (model of model_list) { - model.classList.remove("selected"); - } - this.classList.add("selected"); - accept.setAttribute("selected_model", this.id); - accept.setAttribute("menu", this.getAttribute("Menu")); - accept.setAttribute("display_name", this.getAttribute("display_name")); - }; list_item.append(popup_item); - - model_list.append(list_item); } - var accept = document.getElementById("btn_loadmodelaccept"); - accept.disabled = true; - //finally, if they selected the custom hugging face menu we show the input box - if (data['menu'] == "customhuggingface") { - document.getElementById("custommodelname").classList.remove("hidden"); - } else { - document.getElementById("custommodelname").classList.add("hidden"); - } - - - // detect if we are in a model selection screen and show the reference - var refelement = document.getElementById("modelspecifier"); - var check = document.getElementById("mainmenu"); - if (check) { - refelement.classList.remove("hidden"); - } else { - refelement.classList.add("hidden"); - } openPopup("load-model"); + } -function selected_model_info(data) { +function model_settings_checker() { + //get check value: + missing_element = false; + if (this.check_data != null) { + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid || missing_element) { + //if we are supposed to refresh when this value changes we'll resubmit + if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + if (this.check_data['check_message']) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + if (this.check_data['check_message']) { + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } + } var accept = document.getElementById("btn_loadmodelaccept"); - //hide or unhide key - if (data.key) { - document.getElementById("modelkey").classList.remove("hidden"); - document.getElementById("modelkey").value = data.key_value; - } else { - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - } - //hide or unhide URL - if (data.url) { - document.getElementById("modelurl").classList.remove("hidden"); - } else { - document.getElementById("modelurl").classList.add("hidden"); - } - - //hide or unhide 8 bit mode - if (data.bit_8_available) { - document.getElementById("use_8_bit_div").classList.remove("hidden"); - } else { - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("use_8_bit").checked = false; - } - - //default URL loading - if (data.default_url != null) { - document.getElementById("modelurl").value = data.default_url; - } - - //change model loading on url if needed - if (data.models_on_url) { - document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});}; - document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});}; - } else { - document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});}; - document.getElementById("modelurl").ochange = null; - } - - //show model select for APIs - if (data.show_online_model_select) { - document.getElementById("oaimodel").classList.remove("hidden"); - } else { - document.getElementById("oaimodel").classList.add("hidden"); - } - - //Multiple Model Select? - if (data.multi_online_models) { - document.getElementById("oaimodel").setAttribute("multiple", ""); - document.getElementById("oaimodel").options[0].textContent = "All" - } else { - document.getElementById("oaimodel").removeAttribute("multiple"); - document.getElementById("oaimodel").options[0].textContent = "Select Model(s)" - } - - //hide or unhide the use gpu checkbox - if (data.gpu) { - document.getElementById("use_gpu_div").classList.remove("hidden"); - } else { - document.getElementById("use_gpu_div").classList.add("hidden"); - } - //setup breakmodel - if (data.breakmodel) { - document.getElementById("modellayers").classList.remove("hidden"); - //setup model layer count - document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0); - document.getElementById("gpu_layers_max").textContent = data.layer_count; - document.getElementById("gpu_count").value = data.gpu_count; - - //create the gpu load bars - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); + ok_to_load = true; + for (const item of document.getElementsByClassName("input_error")) { + if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) { + ok_to_load = false; + break; } - - //Add the bars - for (let i = 0; i < data.gpu_names.length; i++) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "gpu_layers_box_"+i; - input.value = data.break_values[i]; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.break_values[i]; - input.id = "gpu_layers_" + i; - input.onchange = function () { - document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - - model_layer_bars.append(div); - } - - //add the disk layers - if (data.disk_break) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "Disk cache: " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "disk_layers_box"; - input.value = data.disk_break_value; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.disk_break_value; - input.id = "disk_layers"; - input.onchange = function () { - document.getElementById(this.id+"_box").value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - } - - model_layer_bars.append(div); - - update_gpu_layers(); - } else { - document.getElementById("modellayers").classList.add("hidden"); + } + + if (ok_to_load) { accept.classList.remove("disabled"); + accept.disabled = false; + } else { + accept.classList.add("disabled"); + accept.disabled = true; } + + + //We now have valid display boxes potentially. We'll go through them and update the display + for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) { + check_value = 0 + missing_element = false; + for (const temp of item.check_data['sum']) { + if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + if (!missing_element) { + item.innerText = item.original_text.replace("%1", check_value); + } + + + } +} + +function selected_model_info(sent_data) { + const data = sent_data['model_backends']; + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); + } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + + var accept = document.getElementById("btn_loadmodelaccept"); accept.disabled = false; + modelplugin = document.getElementById("modelplugin"); + modelplugin.classList.remove("hidden"); + modelplugin.onchange = function () { + for (const area of document.getElementsByClassName("model_plugin_settings_area")) { + area.classList.add("hidden"); + } + if (document.getElementById(this.value + "_settings_area")) { + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + model_settings_checker() + } + //create the content + for (const [loader, items] of Object.entries(data)) { + model_area = document.createElement("DIV"); + model_area.id = loader + "_settings_area"; + model_area.classList.add("model_plugin_settings_area"); + model_area.classList.add("hidden"); + modelpluginoption = document.createElement("option"); + modelpluginoption.innerText = loader; + modelpluginoption.value = loader; + modelplugin.append(modelpluginoption); + + //create the user input for each requested input + for (item of items) { + let new_setting = document.getElementById('blank_model_settings').cloneNode(true); + new_setting.id = loader; + new_setting.classList.remove("hidden"); + new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; + new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); + + onchange_event = model_settings_checker; + if (item['uitype'] == "slider") { + var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); + slider_number.value = item['default']; + slider_number.id = loader + "|" + item['id'] + "_value_text"; + slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;}; + + var slider = new_setting.querySelector('#blank_model_settings_slider'); + slider.value = item['default']; + slider.min = item['min']; + slider.max = item['max']; + slider.setAttribute("data_type", item['unit']); + slider.id = loader + "|" + item['id'] + "_value"; + if ('check' in item) { + slider.check_data = item['check']; + slider_number.check_data = item['check']; + } else { + slider.check_data = null; + slider_number.check_data = null; + } + slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;}; + slider.onchange = onchange_event; + slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; + new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.noresubmit = true; + slider.onchange(); + slider.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_slider').remove(); + } + if (item['uitype'] == "toggle") { + toggle = document.createElement("input"); + toggle.type='checkbox'; + toggle.classList.add("setting_item_input"); + toggle.classList.add("blank_model_settings_input"); + toggle.classList.add("model_settings_input"); + toggle.id = loader + "|" + item['id'] + "_value"; + toggle.checked = item['default']; + toggle.onclick = onchange_event; + toggle.setAttribute("data_type", item['unit']); + toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + toggle.check_data = item['check']; + } else { + toggle.check_data = null; + } + new_setting.querySelector('#blank_model_settings_toggle').append(toggle); + setTimeout(function() { + $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"}); + }, 200); + toggle.noresubmit = true; + toggle.onclick(); + toggle.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_toggle').remove(); + } + if (item['uitype'] == "dropdown") { + var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); + select_element.id = loader + "|" + item['id'] + "_value"; + for (const dropdown_value of item['children']) { + new_option = document.createElement("option"); + new_option.value = dropdown_value['value']; + new_option.innerText = dropdown_value['text']; + select_element.append(new_option); + } + select_element.value = item['default']; + select_element.setAttribute("data_type", item['unit']); + select_element.onchange = onchange_event; + select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if (('multiple' in item) && (item['multiple'])) { + select_element.multiple = true; + select_element.size = 10; + } + if ('check' in item) { + select_element.check_data = item['check']; + } else { + select_element.check_data = null; + } + select_element.noresubmit = true; + select_element.onchange(); + select_element.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_dropdown').remove(); + } + if (item['uitype'] == "password") { + var password_item = new_setting.querySelector('#blank_model_settings_password'); + password_item.id = loader + "|" + item['id'] + "_value"; + password_item.value = item['default']; + password_item.setAttribute("data_type", item['unit']); + password_item.onchange = onchange_event; + password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + password_item.check_data = item['check']; + } else { + password_item.check_data = null; + } + password_item.noresubmit = true; + password_item.onchange(); + password_item.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_password').remove(); + } + if (item['uitype'] == "text") { + var text_item = new_setting.querySelector('#blank_model_settings_text'); + text_item.id = loader + "|" + item['id'] + "_value"; + text_item.value = item['default']; + text_item.onchange = onchange_event; + text_item.setAttribute("data_type", item['unit']); + text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + text_item.check_data = item['check']; + } else { + text_item.check_data = null; + } + text_item.noresubmit = true; + text_item.onchange(); + text_item.noresubmit = false; + } else { + new_setting.querySelector('#blank_model_settings_text').remove(); + } + + if (item['uitype'] == "Valid Display") { + new_setting = document.createElement("DIV"); + new_setting.classList.add("model_settings_valid_display"); + new_setting.id = loader + "|" + item['id'] + "_value"; + new_setting.innerText = item['label']; + new_setting.check_data = item['check']; + new_setting.original_text = item['label']; + } + + model_area.append(new_setting); + loadmodelsettings.append(model_area); + } + } + + //unhide the first plugin settings + if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { + document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); + } + + model_settings_checker() } @@ -1877,42 +1990,37 @@ function update_gpu_layers() { function load_model() { var accept = document.getElementById('btn_loadmodelaccept'); - gpu_layers = [] - disk_layers = 0; - if (!(document.getElementById("modellayers").classList.contains("hidden"))) { - for (let i=0; i < document.getElementById("gpu_count").value; i++) { - gpu_layers.push(document.getElementById("gpu_layers_"+i).value); - } - if (document.getElementById("disk_layers")) { - disk_layers = document.getElementById("disk_layers").value; + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + + //get an object of all the input settings from the user + data = {} + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if ((element.tagName == "SELECT") && (element.multiple)) { + element_data = []; + for (var i=0, iLen=element.options.length; i - + @@ -283,59 +283,7 @@ - + + + {% include 'popups.html' %} + + diff --git a/templates/popups.html b/templates/popups.html index 12c4c27a..9c6b4a9e 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -46,35 +46,11 @@
Usage (VRAM)
-
- -
+
+ + diff --git a/templates/templates.html b/templates/templates.html index 4f16ff66..926bf854 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -1,5 +1,4 @@ -
@@ -154,3 +153,21 @@
+ +
+ + help_icon + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 07261636..df37e0be 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_ return carry class PenalizingCausalTransformer(CausalTransformer): - def __init__(self, config, **kwargs): + def __init__(self, badwordsids, config, **kwargs): # Initialize super().__init__(config, **kwargs) def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None): compiling_callback() numseqs = numseqs_aux.shape[0] # These are the tokens that we don't want the AI to ever write - badwords = jnp.array(koboldai_vars.badwordsids).squeeze() + badwords = jnp.array(badwordsids).squeeze() @hk.transform def generate_sample(context, ctx_length): # Give the initial context to the transformer @@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): koboldai_vars.status_message = "" -def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +import koboldai_settings + +def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if kwargs.get("pad_token_id"): @@ -989,9 +991,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa # Try to convert HF config.json to MTJ config if hf_checkpoint: - spec_path = os.path.join("maps", koboldai_vars.model_type + ".json") + spec_path = os.path.join("maps", model_type + ".json") if not os.path.isfile(spec_path): - raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}") + raise NotImplementedError(f"Unsupported model type {repr(model_type)}") with open(spec_path) as f: lazy_load_spec = json.load(f) @@ -1119,12 +1121,12 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa global badwords # These are the tokens that we don't want the AI to ever write - badwords = jnp.array(koboldai_vars.badwordsids).squeeze() + badwords = jnp.array(badwordsids).squeeze() if not path.endswith("/"): path += "/" - network = PenalizingCausalTransformer(params, dematerialized=True) + network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True) if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX": network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])