From 71aee4dbd8f1d429e0ebd27dbf98bfd6fcf6c52c Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 10 May 2023 16:30:46 -0400 Subject: [PATCH 01/68] First concept of model plugins with a conceptual UI. Completely breaks UI2 model loading. --- aiserver.py | 123 ++-- modeling/inference_model.py | 9 + modeling/inference_models/api.py | 26 +- modeling/inference_models/basic_api.py | 29 +- modeling/inference_models/generic_hf_torch.py | 8 +- modeling/inference_models/hf.py | 190 ------ modeling/inference_models/hf_mtj.py | 22 +- modeling/inference_models/horde.py | 88 ++- modeling/inference_models/openai.py | 85 ++- modeling/inference_models/parents/hf.py | 219 +++++++ .../{ => parents}/hf_torch.py | 56 +- modeling/inference_models/rwkv.py | 26 +- static/koboldai.css | 44 ++ static/koboldai.js | 548 +++++++++--------- templates/popups.html | 30 +- templates/templates.html | 19 + 16 files changed, 912 insertions(+), 610 deletions(-) delete mode 100644 modeling/inference_models/hf.py create mode 100644 modeling/inference_models/parents/hf.py rename modeling/inference_models/{ => parents}/hf_torch.py (94%) diff --git a/aiserver.py b/aiserver.py index e744d18e..e7227c81 100644 --- a/aiserver.py +++ b/aiserver.py @@ -168,6 +168,7 @@ class MenuFolder(MenuItem): "size": "", "isMenu": True, "isDownloaded": False, + "isDirectory": False } class MenuModel(MenuItem): @@ -200,8 +201,28 @@ class MenuModel(MenuItem): "size": self.vram_requirements, "isMenu": False, "isDownloaded": self.is_downloaded, + "isDirectory": False, } +class MenuPath(MenuItem): + def to_ui1(self) -> list: + return [ + self.label, + self.name, + "", + True, + ] + + def to_json(self) -> dict: + return { + "label": self.label, + "name": self.name, + "size": "", + "isMenu": True, + "isDownloaded": False, + "isDirectory": True, + "path": "./models" + } # AI models Menu # This is a dict of lists where they key is the menu name, and the list is the menu items. @@ -209,8 +230,8 @@ class MenuModel(MenuItem): # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False) model_menu = { "mainmenu": [ - MenuModel("Load a model from its directory", "NeoCustom"), - MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), + MenuPath("Load a model from its directory", "NeoCustom"), + MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuFolder("Load custom model from Hugging Face", "customhuggingface"), MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Novel Models", "novellist"), @@ -600,6 +621,15 @@ utils.socketio = socketio # Weird import position to steal koboldai_vars from utils from modeling.patches import patch_transformers +#Load all of the model importers +import importlib +model_loader_code = {} +model_loaders = {} +for module in os.listdir("./modeling/inference_models"): + if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py': + model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) + model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader() + old_socketio_on = socketio.on def new_socketio_on(*a, **k): @@ -906,6 +936,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"): ) def get_folder_path_info(base): + if base is None: + return [], [] if base == 'This PC': breadcrumbs = [['This PC', 'This PC']] paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))] @@ -1932,25 +1964,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal koboldai_vars.breakmodel = False if koboldai_vars.model == "Colab": - from modeling.inference_models.basic_api import BasicAPIInferenceModel - model = BasicAPIInferenceModel() + from modeling.inference_models.basic_api import model_loader + model = model_loader() elif koboldai_vars.model == "API": - from modeling.inference_models.api import APIInferenceModel - model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", "")) + from modeling.inference_models.api import model_loader + model = model_loader(koboldai_vars.colaburl.replace("/request", "")) elif koboldai_vars.model == "CLUSTER": - from modeling.inference_models.horde import HordeInferenceModel - model = HordeInferenceModel() + from modeling.inference_models.horde import model_loader + model = model_loader() elif koboldai_vars.model == "OAI": - from modeling.inference_models.openai import OpenAIAPIInferenceModel - model = OpenAIAPIInferenceModel() + from modeling.inference_models.openai import model_loader + model = model_loader() model.load(initial_load=initial_load) # TODO: This check sucks, make a model object or somethign elif "rwkv" in koboldai_vars.model: if koboldai_vars.use_colab_tpu: raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import RWKVInferenceModel - model = RWKVInferenceModel(koboldai_vars.model) + from modeling.inference_models.rwkv import model_loader + model = model_loader(koboldai_vars.model) model.load() elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: # HF Torch @@ -1961,8 +1993,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal except: pass - from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel - model = GenericHFTorchInferenceModel( + from modeling.inference_models.generic_hf_torch import model_loader + model = model_loader( koboldai_vars.model, lazy_load=koboldai_vars.lazy_load, low_mem=args.lowmem @@ -1975,8 +2007,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal logger.info(f"Pipeline created: {koboldai_vars.model}") else: # TPU - from modeling.inference_models.hf_mtj import HFMTJInferenceModel - model = HFMTJInferenceModel( + from modeling.inference_models.hf_mtj import model_loader + model = model_loader( koboldai_vars.model ) model.load( @@ -6430,7 +6462,9 @@ def UI_2_retry(data): @socketio.on('load_model_button') @logger.catch def UI_2_load_model_button(data): - sendModelSelection() + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]}) + + #==================================================================# # Event triggered when user clicks the a model @@ -6438,6 +6472,38 @@ def UI_2_load_model_button(data): @socketio.on('select_model') @logger.catch def UI_2_select_model(data): + logger.debug("Clicked on model entry: {}".format(data)) + if data["name"] in model_menu and data['ismenu'] == "true": + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) + else: + #Get load methods + logger.debug("Asking for model info on potential model: {}".format(data)) + valid = False + if 'path' not in data or data['path'] == "": + valid_loaders = {} + for model_loader in model_loaders: + logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]))) + if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): + valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + valid = True + if valid: + logger.debug("Valid Loaders: {}".format(valid_loaders)) + emit("selected_model_info", valid_loaders) + if not valid: + #Get directories + paths, breadcrumbs = get_folder_path_info(data['path']) + output = [] + for path in paths: + valid=False + for model_loader in model_loaders: + if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"): + valid=True + break + output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) + emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) + + return + #We've selected a menu if data['model'] in model_menu: @@ -6462,26 +6528,9 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - if not os.path.exists("settings/"): - os.mkdir("settings") - changed = True - if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"): - with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file: - file_data = file.read().split('\n')[:2] - if len(file_data) < 2: - file_data.append("0") - gpu_layers, disk_layers = file_data - if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']: - changed = False - if changed: - f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w") - f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers'])) - f.close() - koboldai_vars.colaburl = data['url'] + "/request" - koboldai_vars.model = data['model'] - koboldai_vars.custmodpth = data['path'] - print("loading Model") - load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) + logger.info("loading Model") + logger.info(data) + #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# # Event triggered when load story is clicked diff --git a/modeling/inference_model.py b/modeling/inference_model.py index b253c5bf..27ad46db 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -169,6 +169,15 @@ class InferenceModel: ] self.tokenizer = None self.capabilties = ModelCapabilities() + + def is_valid(self, model_name, model_path, menu_path, vram): + return True + + def requested_parameters(self, model_name, model_path, menu_path, vram): + return {} + + def define_input_parameters(self): + return def load(self, save_model: bool = False, initial_load: bool = False) -> None: """User-facing load function. Do not override this; try `_load()` instead.""" diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index d25505b0..41088bc7 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -22,9 +22,31 @@ class APIException(Exception): """To be used for errors when using the Kobold API as an interface.""" -class APIInferenceModel(InferenceModel): - def __init__(self, base_url: str) -> None: +class model_loader(InferenceModel): + def __init__(self) -> None: super().__init__() + #self.base_url = "" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "API" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "base_url", + "default": False, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, base_url=""): self.base_url = base_url.rstrip("/") def _load(self, save_model: bool, initial_load: bool) -> None: diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index c96eb42c..d7fc0863 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -19,12 +19,37 @@ class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class BasicAPIInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__(self) -> None: super().__init__() # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "Colab" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "colaburl", + "default": False, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the Colab KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, colaburl=""): + self.colaburl = colaburl + + def _initialize_model(self): + return def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B") @@ -68,7 +93,7 @@ class BasicAPIInferenceModel(InferenceModel): } # Create request - req = requests.post(utils.koboldai_vars.colaburl, json=reqdata) + req = requests.post(self.colaburl, json=reqdata) if req.status_code != 200: raise BasicAPIException(f"Bad status code {req.status_code}") diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index aa602b1a..366fbbb7 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -20,10 +20,14 @@ except ModuleNotFoundError as e: if not utils.koboldai_vars.use_colab_tpu: raise e -from modeling.inference_models.hf_torch import HFTorchInferenceModel +from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel -class GenericHFTorchInferenceModel(HFTorchInferenceModel): +class model_loader(HFTorchInferenceModel): + + def _initialize_model(self): + return + def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py deleted file mode 100644 index cd609fed..00000000 --- a/modeling/inference_models/hf.py +++ /dev/null @@ -1,190 +0,0 @@ -import os -from typing import Optional -from transformers import AutoConfig - -import utils -import koboldai_settings -from logger import logger -from modeling.inference_model import InferenceModel - - -class HFInferenceModel(InferenceModel): - def __init__(self, model_name: str) -> None: - super().__init__() - self.model_config = None - self.model_name = model_name - - self.model = None - self.tokenizer = None - - def _post_load(self) -> None: - # These are model specific tokenizer overrides if a model has bad defaults - if utils.koboldai_vars.model_type == "llama": - # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer - self.tokenizer.add_bos_token = False - - # HF transformers no longer supports decode_with_prefix_space - # We work around this by wrapping decode, encode, and __call__ - # with versions that work around the 'prefix space' misfeature - # of sentencepiece. - vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size)) - has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")} - - # Wrap 'decode' with a method that always returns text starting with a space - # when the head token starts with a space. This is what 'decode_with_prefix_space' - # used to do, and we implement it using the same technique (building a cache of - # tokens that should have a prefix space, and then prepending a space if the first - # token is in this set.) We also work around a bizarre behavior in which decoding - # a single token 13 behaves differently than decoding a squence containing only [13]. - original_decode = type(self.tokenizer.tokenizer).decode - def decode_wrapper(self, token_ids, *args, **kwargs): - first = None - # Note, the code below that wraps single-value token_ids in a list - # is to work around this wonky behavior: - # >>> t.decode(13) - # '<0x0A>' - # >>> t.decode([13]) - # '\n' - # Not doing this causes token streaming to receive <0x0A> characters - # instead of newlines. - if isinstance(token_ids, int): - first = token_ids - token_ids = [first] - elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor - # Tensors don't support the Python standard of 'empty is False' - # and the special case of dimension 0 tensors also needs to be - # handled separately. - if token_ids.dim() == 0: - first = int(token_ids.item()) - token_ids = [first] - elif len(token_ids) > 0: - first = int(token_ids[0]) - elif token_ids: - first = token_ids[0] - result = original_decode(self, token_ids, *args, **kwargs) - if first is not None and first in has_prefix_space: - result = " " + result - return result - # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it - object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer)) - - # Wrap encode and __call__ to work around the 'prefix space' misfeature also. - # The problem is that "Bob" at the start of text is encoded as if it is - # " Bob". This creates a problem because it means you can't split text, encode - # the pieces, concatenate the tokens, decode them, and get the original text back. - # The workaround is to prepend a known token that (1) starts with a space; and - # (2) is not the prefix of any other token. After searching through the vocab - # " ," (space comma) is the only token containing only printable ascii characters - # that fits this bill. By prepending ',' to the text, the original encode - # method always returns [1919, ...], where the tail of the sequence is the - # actual encoded result we want without the prefix space behavior. - original_encode = type(self.tokenizer.tokenizer).encode - def encode_wrapper(self, text, *args, **kwargs): - if type(text) is str: - text = ',' + text - result = original_encode(self, text, *args, **kwargs) - result = result[1:] - else: - result = original_encode(self, text, *args, **kwargs) - return result - object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer)) - - # Since 'encode' is documented as being deprecated, also override __call__. - # This doesn't appear to currently be used by KoboldAI, but doing so - # in case someone uses it in the future. - original_call = type(self.tokenizer.tokenizer).__call__ - def call_wrapper(self, text, *args, **kwargs): - if type(text) is str: - text = ',' + text - result = original_call(self, text, *args, **kwargs) - result = result[1:] - else: - result = original_call(self, text, *args, **kwargs) - return result - object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) - - elif utils.koboldai_vars.model_type == "opt": - self.tokenizer._koboldai_header = self.tokenizer.encode("") - self.tokenizer.add_bos_token = False - self.tokenizer.add_prefix_space = False - - # Change newline behavior to match model quirks - if utils.koboldai_vars.model_type == "xglm": - # Default to newline mode if using XGLM - utils.koboldai_vars.newlinemode = "s" - elif utils.koboldai_vars.model_type in ["opt", "bloom"]: - # Handle but don't convert newlines if using Fairseq models that have newlines trained in them - utils.koboldai_vars.newlinemode = "ns" - - # Clean up tokens that cause issues - if ( - utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") - ): - utils.koboldai_vars.badwordsids = [ - [v] - for k, v in self.tokenizer.get_vocab().items() - if any(c in str(k) for c in "[]") - ] - - if utils.koboldai_vars.newlinemode == "n": - utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) - - return super()._post_load() - - def get_local_model_path( - self, legacy: bool = False, ignore_existance: bool = False - ) -> Optional[str]: - """ - Returns a string of the model's path locally, or None if it is not downloaded. - If ignore_existance is true, it will always return a path. - """ - - if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: - model_path = utils.koboldai_vars.custmodpth - assert model_path - - # Path can be absolute or relative to models directory - if os.path.exists(model_path): - return model_path - - model_path = os.path.join("models", model_path) - - try: - assert os.path.exists(model_path) - except AssertionError: - logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.") - raise - - return model_path - - basename = utils.koboldai_vars.model.replace("/", "_") - if legacy: - ret = basename - else: - ret = os.path.join("models", basename) - - if os.path.isdir(ret) or ignore_existance: - return ret - return None - - def init_model_config(self) -> None: - # Get the model_type from the config or assume a model type if it isn't present - try: - self.model_config = AutoConfig.from_pretrained( - self.get_local_model_path() or self.model_name, - revision=utils.koboldai_vars.revision, - cache_dir="cache", - ) - utils.koboldai_vars.model_type = self.model_config.model_type - except ValueError: - utils.koboldai_vars.model_type = { - "NeoCustom": "gpt_neo", - "GPT2Custom": "gpt2", - }.get(utils.koboldai_vars.model) - - if not utils.koboldai_vars.model_type: - logger.warning( - "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" - ) - utils.koboldai_vars.model_type = "gpt_neo" diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 7661a67f..c99e9a05 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -16,19 +16,17 @@ from modeling.inference_model import ( GenerationSettings, ModelCapabilities, ) -from modeling.inference_models.hf import HFInferenceModel - -# This file shouldn't be imported unless using the TPU -assert utils.koboldai_vars.use_colab_tpu -import tpu_mtj_backend +from modeling.inference_models.parents.hf import HFInferenceModel -class HFMTJInferenceModel(HFInferenceModel): + + +class model_loader(HFInferenceModel): def __init__( self, - model_name: str, + #model_name: str, ) -> None: - super().__init__(model_name) + super().__init__() self.model_config = None self.capabilties = ModelCapabilities( @@ -38,8 +36,13 @@ class HFMTJInferenceModel(HFInferenceModel): post_token_probs=False, uses_tpu=True, ) + + def is_valid(self, model_name, model_path, menu_path): + # This file shouldn't be imported unless using the TPU + return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path) def setup_mtj(self) -> None: + import tpu_mtj_backend def mtj_warper_callback(scores) -> "np.array": scores_shape = scores.shape scores_list = scores.tolist() @@ -175,6 +178,7 @@ class HFMTJInferenceModel(HFInferenceModel): tpu_mtj_backend.settings_callback = mtj_settings_callback def _load(self, save_model: bool, initial_load: bool) -> None: + import tpu_mtj_backend self.setup_mtj() self.init_model_config() utils.koboldai_vars.allowsp = True @@ -207,6 +211,7 @@ class HFMTJInferenceModel(HFInferenceModel): ] def get_soft_tokens(self) -> np.array: + import tpu_mtj_backend soft_tokens = None if utils.koboldai_vars.sp is None: @@ -258,6 +263,7 @@ class HFMTJInferenceModel(HFInferenceModel): seed: Optional[int] = None, **kwargs, ) -> GenerationResult: + import tpu_mtj_backend warpers.update_settings() soft_tokens = self.get_soft_tokens() diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index c6294374..56e88205 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -21,13 +21,99 @@ class HordeException(Exception): """To be used for errors on server side of the Horde.""" -class HordeInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__(self) -> None: super().__init__() + self.url = "https://horde.koboldai.net" + self.key = "0000000000" + self.models = self.get_cluster_models() + # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + def is_valid(self, model_name, model_path, menu_path): + logger.debug("Horde Models: {}".format(self.models)) + return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models] + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "url", + "default": self.url, + "tooltip": "URL to the horde.", + "menu_path": "", + "check": {"value": "", 'check': "!="}, + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": self.key, + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.models, + + }]) + return requested_parameters + + def set_input_parameters(self, url="", key="", model=""): + self.key = key.strip() + self.model = model + self.url = url + + def get_cluster_models(self): + # Get list of models from public cluster + logger.info("Retrieving engine list...") + try: + req = requests.get(f"{self.url}/api/v2/status/models?type=text") + except: + logger.init_err("KAI Horde Models", status="Failed") + logger.error("Provided KoboldAI Horde URL unreachable") + emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) + return + if not req.ok: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("KAI Horde Models", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") + return + + engines = req.json() + try: + engines = [{"text": en["name"], "value": en["name"]} for en in engines] + except: + logger.error(engines) + raise + logger.debug(engines) + + online_model = "" + + logger.init_ok("KAI Horde Models", status="OK") + + return engines + def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer( utils.koboldai_vars.cluster_requested_models[0] diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index 1441ae2f..01c0c037 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -12,13 +12,96 @@ from modeling.inference_model import ( ) + class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class OpenAIAPIInferenceModel(InferenceModel): +class model_loader(InferenceModel): """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.key = "" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" or model_name == "GooseAI" + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.get_oai_models(), + + }]) + return requested_parameters + + def set_input_parameters(self, key="", model=""): + self.key = key.strip() + self.model = model + + def get_oai_models(self): + if self.key == "": + return [] + if self.source == 'OAI': + url = "https://api.openai.com/v1/engines" + elif self.source == 'GooseAI': + url = "https://api.goose.ai/v1/engines" + else: + return + + # Get list of models from OAI + logger.init("OAI Engines", status="Retrieving") + req = requests.get( + url, + headers = { + 'Authorization': 'Bearer '+self.key + } + ) + if(req.status_code == 200): + r = req.json() + engines = r["data"] + try: + engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] + except: + logger.error(engines) + raise + + online_model = "" + + + logger.init_ok("OAI Engines", status="OK") + return engines + else: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("OAI Engines", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) + return [] + def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer("gpt2") diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py new file mode 100644 index 00000000..54781296 --- /dev/null +++ b/modeling/inference_models/parents/hf.py @@ -0,0 +1,219 @@ +import os +from typing import Optional +from transformers import AutoConfig + +import utils +import koboldai_settings +from logger import logger +from modeling.inference_model import InferenceModel +import torch + + +class HFInferenceModel(InferenceModel): + def __init__(self) -> None: + super().__init__() + self.model_config = None + #self.model_name = model_name + + self.model = None + self.tokenizer = None + + def is_valid(self, model_name, model_path, menu_path): + try: + if model_path is not None and os.path.exists(model_path): + model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + return True + except: + return False + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + + if model_path is not None and os.path.exists(model_path): + model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0: + if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): + with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: + data = [x for x in file.read().split("\n")[:2] if x != ''] + if len(data) < 2: + data.append("0") + break_values, disk_blocks = data + break_values = break_values.split(",") + else: + break_values = [layer_count] + disk_blocks = None + break_values = [int(x) for x in break_values if x != '' and x is not None] + gpu_count = torch.cuda.device_count() + break_values += [0] * (gpu_count - len(break_values)) + if disk_blocks is not None: + break_values += [disk_blocks] + for i in range(gpu_count): + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "{} Layers".format(torch.cuda.get_device_name(i)), + "id": "{} Layers".format(i), + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": break_values[i], + "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "CPU Layers", + "id": "CPU Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": layer_count - sum(break_values), + "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + if disk_blocks is not None: + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "Disk Layers", + "id": "Disk_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": disk_blocks, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + else: + requested_parameters.append({ + "uitype": "toggle", + "unit": "bool", + "label": "Use GPU", + "id": "use_gpu", + "default": False, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + + + return requested_parameters + + def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False): + self.layers = layers + self.disk_layers = disk_layers + self.use_gpu = use_gpu + + def _post_load(self) -> None: + # These are model specific tokenizer overrides if a model has bad defaults + if utils.koboldai_vars.model_type == "llama": + self.tokenizer.decode_with_prefix_space = True + self.tokenizer.add_bos_token = False + elif utils.koboldai_vars.model_type == "opt": + self.tokenizer._koboldai_header = self.tokenizer.encode("") + self.tokenizer.add_bos_token = False + self.tokenizer.add_prefix_space = False + + # Change newline behavior to match model quirks + if utils.koboldai_vars.model_type == "xglm": + # Default to newline mode if using XGLM + utils.koboldai_vars.newlinemode = "s" + elif utils.koboldai_vars.model_type in ["opt", "bloom"]: + # Handle but don't convert newlines if using Fairseq models that have newlines trained in them + utils.koboldai_vars.newlinemode = "ns" + + # Clean up tokens that cause issues + if ( + utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default + and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + ): + utils.koboldai_vars.badwordsids = [ + [v] + for k, v in self.tokenizer.get_vocab().items() + if any(c in str(k) for c in "[]") + ] + + if utils.koboldai_vars.newlinemode == "n": + utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + + return super()._post_load() + + def get_local_model_path( + self, legacy: bool = False, ignore_existance: bool = False + ) -> Optional[str]: + """ + Returns a string of the model's path locally, or None if it is not downloaded. + If ignore_existance is true, it will always return a path. + """ + + if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: + model_path = utils.koboldai_vars.custmodpth + assert model_path + + # Path can be absolute or relative to models directory + if os.path.exists(model_path): + return model_path + + model_path = os.path.join("models", model_path) + + try: + assert os.path.exists(model_path) + except AssertionError: + logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.") + raise + + return model_path + + basename = utils.koboldai_vars.model.replace("/", "_") + if legacy: + ret = basename + else: + ret = os.path.join("models", basename) + + if os.path.isdir(ret) or ignore_existance: + return ret + return None + + def init_model_config(self) -> None: + # Get the model_type from the config or assume a model type if it isn't present + try: + self.model_config = AutoConfig.from_pretrained( + self.get_local_model_path() or self.model_name, + revision=utils.koboldai_vars.revision, + cache_dir="cache", + ) + utils.koboldai_vars.model_type = self.model_config.model_type + except ValueError: + utils.koboldai_vars.model_type = { + "NeoCustom": "gpt_neo", + "GPT2Custom": "gpt2", + }.get(utils.koboldai_vars.model) + + if not utils.koboldai_vars.model_type: + logger.warning( + "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" + ) + utils.koboldai_vars.model_type = "gpt_neo" diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/parents/hf_torch.py similarity index 94% rename from modeling/inference_models/hf_torch.py rename to modeling/inference_models/parents/hf_torch.py index 990fabfc..d8afafb1 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -31,7 +31,7 @@ from modeling import warpers from modeling.warpers import Warper from modeling.stoppers import Stoppers from modeling.post_token_hooks import PostTokenHooks -from modeling.inference_models.hf import HFInferenceModel +from modeling.inference_models.parents.hf import HFInferenceModel from modeling.inference_model import ( GenerationResult, GenerationSettings, @@ -55,13 +55,13 @@ LOG_SAMPLER_NO_EFFECT = False class HFTorchInferenceModel(HFInferenceModel): def __init__( self, - model_name: str, - lazy_load: bool, - low_mem: bool, + #model_name: str, + #lazy_load: bool, + #low_mem: bool, ) -> None: - super().__init__(model_name) - self.lazy_load = lazy_load - self.low_mem = low_mem + super().__init__() + #self.lazy_load = lazy_load + #self.low_mem = low_mem self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -211,40 +211,6 @@ class HFTorchInferenceModel(HFInferenceModel): new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample - # PEFT Loading. This MUST be done after all save_pretrained calls are - # finished on the main model. - if utils.args.peft: - from peft import PeftModel, PeftConfig - local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft") - - # Make PEFT dir if it doesn't exist - try: - os.makedirs(local_peft_dir) - except FileExistsError: - pass - - peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_")) - logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") - - peft_installed_locally = True - possible_peft_locations = [peft_local_path, utils.args.peft] - - for i, location in enumerate(possible_peft_locations): - try: - m_self.model = PeftModel.from_pretrained(m_self.model, location) - logger.debug(f"Loaded PEFT at '{location}'") - break - except ValueError: - peft_installed_locally = False - if i == len(possible_peft_locations) - 1: - raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?") - except RuntimeError: - raise RuntimeError("Error while loading PeftModel. Are you using the correct model?") - - if not peft_installed_locally: - logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'") - m_self.model.save_pretrained(peft_local_path) - return super()._post_load() def _raw_generate( @@ -272,13 +238,8 @@ class HFTorchInferenceModel(HFInferenceModel): with torch.no_grad(): start_time = time.time() - - # HEED & BEWARE: All arguments passed to self.model.generate MUST be - # kwargs; see https://github.com/huggingface/peft/issues/232. If they - # aren't, PeftModel will EXPLODE!!!! But nothing will happen without - # a PEFT loaded so it's sneaky. genout = self.model.generate( - input_ids=gen_in, + gen_in, do_sample=True, max_length=min( len(prompt_tokens) + max_new, utils.koboldai_vars.max_length @@ -304,7 +265,6 @@ class HFTorchInferenceModel(HFInferenceModel): def _get_model(self, location: str, tf_kwargs: Dict): tf_kwargs["revision"] = utils.koboldai_vars.revision tf_kwargs["cache_dir"] = "cache" - tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code # If we have model hints for legacy model, use them rather than fall back. try: diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py index 006bb8fd..d14d8c81 100644 --- a/modeling/inference_models/rwkv.py +++ b/modeling/inference_models/rwkv.py @@ -17,7 +17,7 @@ from torch.nn import functional as F os.environ["RWKV_JIT_ON"] = "1" # TODO: Include compiled kernel os.environ["RWKV_CUDA_ON"] = "1" -from rwkv.model import RWKV + import utils from logger import logger @@ -55,13 +55,13 @@ MODEL_FILES = { } -class RWKVInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__( self, - model_name: str, + #model_name: str, ) -> None: super().__init__() - self.model_name = model_name + #self.model_name = model_name self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -83,6 +83,23 @@ class RWKVInferenceModel(InferenceModel): ) self._old_stopping_criteria = None + def is_valid(self, model_name, model_path, menu_path): + try: + from rwkv.model import RWKV + valid = True + except: + valid = False + return valid and "rwkv" in model_name.lower() + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + return requested_parameters + + def set_input_parameters(self): + return + + def _ensure_directory_structure(self) -> None: for path in ["models/rwkv", "models/rwkv/models"]: try: @@ -145,6 +162,7 @@ class RWKVInferenceModel(InferenceModel): # Now we load! # TODO: Breakmodel to strat + from rwkv.model import RWKV self.model = RWKV(model=model_path, strategy="cuda:0 fp16") def _apply_warpers( diff --git a/static/koboldai.css b/static/koboldai.css index 230f1cbf..f3dde4b7 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding); } +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px 23px 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + .setting_minlabel { padding-top: 6px; grid-area: minlabel; @@ -3370,6 +3392,23 @@ textarea { } } +@keyframes pulse-red { + 0% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7); + } + + 70% { + transform: scale(1); + box-shadow: 0 0 0 10px rgba(255, 0, 0, 0); + } + + 100% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0); + } +} + @keyframes pulse-text { 0% { filter: blur(3px); @@ -3391,6 +3430,11 @@ textarea { } } +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; +} + .single_pulse { animation: pulse-text 0.5s 1; } diff --git a/static/koboldai.js b/static/koboldai.js index cfc32d21..0656253f 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -15,6 +15,7 @@ socket.on('popup_items', function(data){popup_items(data);}); socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); socket.on('show_model_menu', function(data){show_model_menu(data);}); +socket.on('open_model_load_menu', function(data){new_show_model_menu(data);}); socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('oai_engines', function(data){oai_engines(data);}); socket.on('buildload', function(data){buildload(data);}); @@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true"; let story_id = -1; var dirty_chunks = []; var initial_socketio_connection_occured = false; +var selected_model_data; // Each entry into this array should be an object that looks like: // {class: "class", key: "key", func: callback} @@ -1500,49 +1502,46 @@ function getModelParameterCount(modelName) { return base * multiplier; } -function show_model_menu(data) { - //clear old options - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - document.getElementById("modelurl").classList.add("hidden"); - document.getElementById("use_gpu_div").classList.add("hidden"); - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("modellayers").classList.add("hidden"); - document.getElementById("oaimodel").classList.add("hidden"); - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); +function new_show_model_menu(data) { + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); } + document.getElementById("modelplugin").classList.add("hidden"); + var accept = document.getElementById("btn_loadmodelaccept"); + accept.disabled = false; //clear out the breadcrumbs var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs') while (breadcrumbs.firstChild) { breadcrumbs.removeChild(breadcrumbs.firstChild); } - //add breadcrumbs - //console.log(data.breadcrumbs); - for (item of data.breadcrumbs) { - var button = document.createElement("button"); - button.classList.add("breadcrumbitem"); - button.setAttribute("model", data.menu); - button.setAttribute("folder", item[0]); - button.textContent = item[1]; - button.onclick = function () { - socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")}); - }; - breadcrumbs.append(button); - var span = document.createElement("span"); - span.textContent = "\\"; - breadcrumbs.append(span); - } + //add breadcrumbs + if ('breadcrumbs' in data) { + for (item of data.breadcrumbs) { + var button = document.createElement("button"); + button.classList.add("breadcrumbitem"); + button.setAttribute("model", data.menu); + button.setAttribute("folder", item[0]); + button.textContent = item[1]; + button.onclick = function () { + socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")}); + }; + breadcrumbs.append(button); + var span = document.createElement("span"); + span.textContent = "\\"; + breadcrumbs.append(span); + } + } //clear out the items var model_list = document.getElementById('loadmodellistcontent') while (model_list.firstChild) { model_list.removeChild(model_list.firstChild); } //add items - for (item of data.data) { + for (item of data.items) { var list_item = document.createElement("span"); list_item.classList.add("model_item"); @@ -1564,10 +1563,27 @@ function show_model_menu(data) { //create the actual item var popup_item = document.createElement("span"); popup_item.classList.add("model"); - popup_item.setAttribute("display_name", item.label); - popup_item.id = item.name; + for (const key in item) { + if (key == "name") { + popup_item.id = item[key]; + } + popup_item.setAttribute(key, item[key]); + } + + popup_item.onclick = function() { + var attributes = this.attributes; + var obj = {}; + + for (var i = 0, len = attributes.length; i < len; i++) { + obj[attributes[i].name] = attributes[i].value; + } + //put the model data on the accept button so we can send it to the server when you accept + var accept = document.getElementById("popup_accept"); + selected_model_data = obj; + //send the data to the server so it can figure out what data we need from the user for the model + socket.emit('select_model', obj); + } - popup_item.setAttribute("Menu", data.menu) //name text var text = document.createElement("span"); text.style="grid-area: item;"; @@ -1615,241 +1631,223 @@ function show_model_menu(data) { }); })(); - popup_item.onclick = function () { - var accept = document.getElementById("btn_loadmodelaccept"); - accept.classList.add("disabled"); - socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")}); - var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected"); - for (model of model_list) { - model.classList.remove("selected"); - } - this.classList.add("selected"); - accept.setAttribute("selected_model", this.id); - accept.setAttribute("menu", this.getAttribute("Menu")); - accept.setAttribute("display_name", this.getAttribute("display_name")); - }; list_item.append(popup_item); - - model_list.append(list_item); } - var accept = document.getElementById("btn_loadmodelaccept"); - accept.disabled = true; - //finally, if they selected the custom hugging face menu we show the input box - if (data['menu'] == "customhuggingface") { - document.getElementById("custommodelname").classList.remove("hidden"); - } else { - document.getElementById("custommodelname").classList.add("hidden"); - } - - - // detect if we are in a model selection screen and show the reference - var refelement = document.getElementById("modelspecifier"); - var check = document.getElementById("mainmenu"); - if (check) { - refelement.classList.remove("hidden"); - } else { - refelement.classList.add("hidden"); - } openPopup("load-model"); + } + function selected_model_info(data) { + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); + } var accept = document.getElementById("btn_loadmodelaccept"); - //hide or unhide key - if (data.key) { - document.getElementById("modelkey").classList.remove("hidden"); - document.getElementById("modelkey").value = data.key_value; - } else { - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - } - //hide or unhide URL - if (data.url) { - document.getElementById("modelurl").classList.remove("hidden"); - } else { - document.getElementById("modelurl").classList.add("hidden"); - } - - //hide or unhide 8 bit mode - if (data.bit_8_available) { - document.getElementById("use_8_bit_div").classList.remove("hidden"); - } else { - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("use_8_bit").checked = false; - } - - //default URL loading - if (data.default_url != null) { - document.getElementById("modelurl").value = data.default_url; - } - - //change model loading on url if needed - if (data.models_on_url) { - document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});}; - document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});}; - } else { - document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});}; - document.getElementById("modelurl").ochange = null; - } - - //show model select for APIs - if (data.show_online_model_select) { - document.getElementById("oaimodel").classList.remove("hidden"); - } else { - document.getElementById("oaimodel").classList.add("hidden"); - } - - //Multiple Model Select? - if (data.multi_online_models) { - document.getElementById("oaimodel").setAttribute("multiple", ""); - document.getElementById("oaimodel").options[0].textContent = "All" - } else { - document.getElementById("oaimodel").removeAttribute("multiple"); - document.getElementById("oaimodel").options[0].textContent = "Select Model(s)" - } - - //hide or unhide the use gpu checkbox - if (data.gpu) { - document.getElementById("use_gpu_div").classList.remove("hidden"); - } else { - document.getElementById("use_gpu_div").classList.add("hidden"); - } - //setup breakmodel - if (data.breakmodel) { - document.getElementById("modellayers").classList.remove("hidden"); - //setup model layer count - document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0); - document.getElementById("gpu_layers_max").textContent = data.layer_count; - document.getElementById("gpu_count").value = data.gpu_count; - - //create the gpu load bars - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); - } - - //Add the bars - for (let i = 0; i < data.gpu_names.length; i++) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "gpu_layers_box_"+i; - input.value = data.break_values[i]; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.break_values[i]; - input.id = "gpu_layers_" + i; - input.onchange = function () { - document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - - model_layer_bars.append(div); - } - - //add the disk layers - if (data.disk_break) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "Disk cache: " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "disk_layers_box"; - input.value = data.disk_break_value; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.disk_break_value; - input.id = "disk_layers"; - input.onchange = function () { - document.getElementById(this.id+"_box").value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - } - - model_layer_bars.append(div); - - update_gpu_layers(); - } else { - document.getElementById("modellayers").classList.add("hidden"); - accept.classList.remove("disabled"); - } accept.disabled = false; + modelplugin = document.getElementById("modelplugin"); + modelplugin.classList.remove("hidden"); + modelplugin.onchange = function () { + for (const area of document.getElementsByClassName("model_plugin_settings_area")) { + area.classList.add("hidden"); + } + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + //create the content + for (const [loader, items] of Object.entries(data)) { + model_area = document.createElement("DIV"); + model_area.id = loader + "_settings_area"; + model_area.classList.add("model_plugin_settings_area"); + model_area.classList.add("hidden"); + modelpluginoption = document.createElement("option"); + modelpluginoption.innerText = loader; + modelpluginoption.value = loader; + modelplugin.append(modelpluginoption); + + for (item of items) { + let new_setting = document.getElementById('blank_model_settings').cloneNode(true); + new_setting.id = loader; + new_setting.classList.remove("hidden"); + new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; + new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); + + onchange_event = function () { + //get check value: + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid) { + //if we are supposed to refresh when this value changes we'll resubmit + if (this.getAttribute("refresh_model_inputs") == "true") { + console.log("resubmit"); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + var accept = document.getElementById("btn_loadmodelaccept"); + if (document.getElementsByClassName("input_error").length) + accept.disabled = true; + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + var accept = document.getElementById("btn_loadmodelaccept"); + if (document.getElementsByClassName("input_error").length > 0) { + accept.classList.add("disabled"); + accept.disabled = true; + } else { + accept.classList.remove("disabled"); + accept.disabled = false; + } + + } + if (item['uitype'] == "slider") { + var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); + slider_number.value = item['default']; + slider_number.id = loader + "|" + item['id'] + "_value_text"; + slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;}; + + var slider = new_setting.querySelector('#blank_model_settings_slider'); + slider.value = item['default']; + slider.min = item['min']; + slider.max = item['max']; + slider.id = loader + "|" + item['id'] + "_value"; + if ('check' in item) { + slider.check_data = item['check']; + slider_number.check_data = item['check']; + } else { + slider.check_data = null; + slider_number.check_data = null; + } + slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;}; + slider.onchange = onchange_event; + slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; + new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden"); + } + if (item['uitype'] == "toggle") { + var toggle = new_setting.querySelector('#blank_model_settings_toggle'); + toggle.id = loader + "|" + item['id'] + "_value"; + toggle.checked = item['default']; + toggle.onchange = onchange_event; + toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + toggle.check_data = item['check']; + } else { + toggle.check_data = null; + } + toggle.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden"); + } + if (item['uitype'] == "dropdown") { + var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); + select_element.id = loader + "|" + item['id'] + "_value"; + for (const dropdown_value of item['children']) { + new_option = document.createElement("option"); + new_option.value = dropdown_value['value']; + new_option.innerText = dropdown_value['text']; + select_element.append(new_option); + } + select_element.value = item['default']; + select_element.onchange = onchange_event; + select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + select_element.check_data = item['check']; + } else { + select_element.check_data = null; + } + select_element.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden"); + } + if (item['uitype'] == "password") { + var password_item = new_setting.querySelector('#blank_model_settings_password'); + password_item.id = loader + "|" + item['id'] + "_value"; + password_item.value = item['default']; + password_item.onchange = onchange_event; + password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + password_item.check_data = item['check']; + } else { + password_item.check_data = null; + } + password_item.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_password').classList.add("hidden"); + } + if (item['uitype'] == "text") { + var text_item = new_setting.querySelector('#blank_model_settings_text'); + text_item.id = loader + "|" + item['id'] + "_value"; + text_item.value = item['default']; + text_item.onchange = onchange_event; + text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + text_item.check_data = item['check']; + } else { + text_item.check_data = null; + } + text_item.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_text').classList.add("hidden"); + } + + model_area.append(new_setting); + loadmodelsettings.append(model_area); + } + } + + //unhide the first plugin settings + console.log(document.getElementById("modelplugin").value + "_settings_area"); + if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { + document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); + } } @@ -1877,42 +1875,16 @@ function update_gpu_layers() { function load_model() { var accept = document.getElementById('btn_loadmodelaccept'); - gpu_layers = [] - disk_layers = 0; - if (!(document.getElementById("modellayers").classList.contains("hidden"))) { - for (let i=0; i < document.getElementById("gpu_count").value; i++) { - gpu_layers.push(document.getElementById("gpu_layers_"+i).value); - } - if (document.getElementById("disk_layers")) { - disk_layers = document.getElementById("disk_layers").value; - } - } - //Need to do different stuff with custom models - if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) { - var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu"); - var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name"); - } else { - var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model"); - var path = ""; - } + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - let selected_models = []; - for (item of document.getElementById("oaimodel").selectedOptions) { - selected_models.push(item.value); - } - if (selected_models == ['']) { - - selected_models = []; - } else if (selected_models.length == 1) { - selected_models = selected_models[0]; + //get an object of all the input settings from the user + data = {} + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + data[element.id.split("|")[1].replace("_value", "")] = element.value; } + data = {...data, ...selected_model_data} - message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked, - 'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(), - 'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value, - 'online_model': selected_models, - 'use_8_bit': document.getElementById('use_8_bit').checked}; - socket.emit("load_model", message); + socket.emit("load_model", data); closePopups(); } diff --git a/templates/popups.html b/templates/popups.html index 12c4c27a..59f07e70 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -46,35 +46,11 @@
Usage (VRAM)
-
- -
+
+ + diff --git a/templates/templates.html b/templates/templates.html index 4f16ff66..49cd3e5b 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -154,3 +154,22 @@ + +
+ + help_icon + + + + + + + + + + + + + + +
\ No newline at end of file From 77dd5aa7259f65262f6077957b493c74d98eaa24 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 09:09:09 -0400 Subject: [PATCH 02/68] Minor update --- aiserver.py | 7 +++++-- modeling/inference_models/horde.py | 2 +- static/koboldai.js | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index e7227c81..ac90d6f4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6489,7 +6489,7 @@ def UI_2_select_model(data): if valid: logger.debug("Valid Loaders: {}".format(valid_loaders)) emit("selected_model_info", valid_loaders) - if not valid: + if not valid and 'path' in data: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) output = [] @@ -6501,7 +6501,9 @@ def UI_2_select_model(data): break output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) - + elif not valid: + logger.error("Nothing can load the model: {}".format(valid_loaders)) + return @@ -6530,6 +6532,7 @@ def UI_2_select_model(data): def UI_2_load_model(data): logger.info("loading Model") logger.info(data) + model_loaders[data['plugin']].set_input_parameters(**data) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 56e88205..f02cf265 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -67,7 +67,7 @@ class model_loader(InferenceModel): "unit": "text", "label": "Model", "id": "model", - "default": "", + "default": model_name, "check": {"value": "", 'check': "!="}, "tooltip": "Which model to use when running OpenAI/GooseAI.", "menu_path": "", diff --git a/static/koboldai.js b/static/koboldai.js index 0656253f..1907add8 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1882,7 +1882,9 @@ function load_model() { for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { data[element.id.split("|")[1].replace("_value", "")] = element.value; } - data = {...data, ...selected_model_data} + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; socket.emit("load_model", data); closePopups(); From 4605d10c370b994cfbd1d27891ccae6ade8b9c6b Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 12:08:35 -0400 Subject: [PATCH 03/68] Next iteration. Model Loading is broken completely now :) --- aiserver.py | 180 +++--------------- modeling/inference_model.py | 6 +- modeling/inference_models/api.py | 4 +- modeling/inference_models/basic_api.py | 4 +- modeling/inference_models/generic_hf_torch.py | 1 + modeling/inference_models/horde.py | 8 +- modeling/inference_models/openai.py | 6 +- modeling/inference_models/parents/hf.py | 56 ++++-- modeling/inference_models/parents/hf_torch.py | 2 +- modeling/inference_models/readonly.py | 77 ++++++++ static/koboldai.js | 13 +- 11 files changed, 170 insertions(+), 187 deletions(-) create mode 100644 modeling/inference_models/readonly.py diff --git a/aiserver.py b/aiserver.py index ac90d6f4..f9e60641 100644 --- a/aiserver.py +++ b/aiserver.py @@ -645,10 +645,14 @@ def new_socketio_on(*a, **k): socketio.on = new_socketio_on def emit(*args, **kwargs): - try: - return _emit(*args, **kwargs) - except AttributeError: - return socketio.emit(*args, **kwargs) + if has_request_context(): + try: + return _emit(*args, **kwargs) + except AttributeError: + return socketio.emit(*args, **kwargs) + else: #We're trying to send data outside of the http context. This won't work. Try the relay + if koboldai_settings.queue is not None: + koboldai_settings.queue.put([args[0], args[1], kwargs]) utils.emit = emit #replacement for tpool.execute to maintain request contexts @@ -1780,10 +1784,6 @@ def get_cluster_models(msg): emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - -def reset_model_settings(): - koboldai_vars.reset_for_model_load() - def unload_model(): global model @@ -1816,7 +1816,7 @@ def unload_model(): koboldai_vars.badwordsids = koboldai_settings.badwordsids_default -def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False): +def load_model(plugin, initial_load=False): global model global tokenizer global model_config @@ -1827,79 +1827,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if initial_load: use_breakmodel_args = True - reset_model_settings() koboldai_vars.reset_model() - koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model - if koboldai_vars.cluster_requested_models == [""]: - koboldai_vars.cluster_requested_models = [] - koboldai_vars.noai = False - if not use_breakmodel_args: - set_aibusy(True) - if koboldai_vars.model != 'ReadOnly': - emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) - #Have to add a sleep so the server will send the emit for some reason - time.sleep(0.1) + set_aibusy(True) + if koboldai_vars.model != 'ReadOnly': + emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) + #Have to add a sleep so the server will send the emit for some reason + time.sleep(0.1) - if gpu_layers is not None: - args.breakmodel_gpulayers = gpu_layers - elif use_breakmodel_args: - gpu_layers = args.breakmodel_gpulayers - if breakmodel_args_default_to_cpu and gpu_layers is None: - gpu_layers = args.breakmodel_gpulayers = [] - if disk_layers is not None: - args.breakmodel_disklayers = int(disk_layers) - elif use_breakmodel_args: - disk_layers = args.breakmodel_disklayers - if breakmodel_args_default_to_cpu and disk_layers is None: - disk_layers = args.breakmodel_disklayers = 0 + if 'model' in globals(): + model.unload() - unload_model() - - if online_model == "": - koboldai_vars.configname = getmodelname() - #Let's set the GooseAI or OpenAI server URLs if that's applicable - else: - koboldai_vars.online_model = online_model - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}" - elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list): - if len(online_model) != 1: - koboldai_vars.configname = koboldai_vars.model - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}" - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}" - - if path.exists(get_config_filename()): - changed=False - with open(get_config_filename(), "r") as file: - # Check if API key exists - js = json.load(file) - if 'online_model' in js: - if js['online_model'] != online_model: - changed=True - js['online_model'] = online_model - else: - changed=True - js['online_model'] = online_model - - if changed: - with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file: - file.write(json.dumps(js, indent=3)) - - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - args.configname = "GooseAI" + "/" + online_model - elif koboldai_vars.model != "CLUSTER": - args.configname = koboldai_vars.model + "/" + online_model - koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model) # If transformers model was selected & GPU available, ask to use CPU or GPU if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): @@ -1937,84 +1876,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal else: koboldai_vars.default_preset = koboldai_settings.default_preset - - # Ask for API key if InferKit was selected - if koboldai_vars.model == "InferKit": - koboldai_vars.apikey = koboldai_vars.oaiapikey - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = "GooseAI" - - # Ask for API key if OpenAI was selected - if koboldai_vars.model == "OAI" and not koboldai_vars.configname: - koboldai_vars.configname = "OAI" - - if koboldai_vars.model == "ReadOnly": - koboldai_vars.noai = True - - # TODO: InferKit - if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai: - pass - elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]: - koboldai_vars.colaburl = url or koboldai_vars.colaburl - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = False - - if koboldai_vars.model == "Colab": - from modeling.inference_models.basic_api import model_loader - model = model_loader() - elif koboldai_vars.model == "API": - from modeling.inference_models.api import model_loader - model = model_loader(koboldai_vars.colaburl.replace("/request", "")) - elif koboldai_vars.model == "CLUSTER": - from modeling.inference_models.horde import model_loader - model = model_loader() - elif koboldai_vars.model == "OAI": - from modeling.inference_models.openai import model_loader - model = model_loader() - - model.load(initial_load=initial_load) - # TODO: This check sucks, make a model object or somethign - elif "rwkv" in koboldai_vars.model: - if koboldai_vars.use_colab_tpu: - raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import model_loader - model = model_loader(koboldai_vars.model) - model.load() - elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: - # HF Torch - logger.init("Transformers", status='Starting') - for m in ("GPTJModel", "XGLMModel"): - try: - globals()[m] = getattr(__import__("transformers"), m) - except: - pass - - from modeling.inference_models.generic_hf_torch import model_loader - model = model_loader( - koboldai_vars.model, - lazy_load=koboldai_vars.lazy_load, - low_mem=args.lowmem - ) - - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) - logger.info(f"Pipeline created: {koboldai_vars.model}") - else: - # TPU - from modeling.inference_models.hf_mtj import model_loader - model = model_loader( - koboldai_vars.model - ) - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) + model = model_loaders[plugin] + model.load(initial_load=initial_load) # TODO: Convert everywhere to use model.tokenizer if model: @@ -6532,7 +6396,8 @@ def UI_2_select_model(data): def UI_2_load_model(data): logger.info("loading Model") logger.info(data) - model_loaders[data['plugin']].set_input_parameters(**data) + model_loaders[data['plugin']].set_input_parameters(data) + load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# @@ -8155,7 +8020,8 @@ def send_one_time_messages(data, wait_time=0): # Test #==================================================================# def model_info(): - if model_config is not None: + global model_config + if 'model_config' in globals() and model_config is not None: if isinstance(model_config, dict): if 'model_type' in model_config: model_type = str(model_config['model_type']) @@ -11045,7 +10911,7 @@ for schema in config_endpoint_schemas: def startup(): if koboldai_vars.model == "" or koboldai_vars.model is None: koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, **{'initial_load':True}) + socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True}) print("", end="", flush=True) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 27ad46db..343eb39a 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -169,6 +169,7 @@ class InferenceModel: ] self.tokenizer = None self.capabilties = ModelCapabilities() + self.model_name = "Not Defined" def is_valid(self, model_name, model_path, menu_path, vram): return True @@ -176,7 +177,7 @@ class InferenceModel: def requested_parameters(self, model_name, model_path, menu_path, vram): return {} - def define_input_parameters(self): + def set_input_parameters(self, parameters): return def load(self, save_model: bool = False, initial_load: bool = False) -> None: @@ -186,6 +187,9 @@ class InferenceModel: self._load(save_model=save_model, initial_load=initial_load) self._post_load() + def unload(self): + return + def _pre_load(self) -> None: """Pre load hook. Called before `_load()`.""" diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index 41088bc7..5bddd714 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -46,8 +46,8 @@ class model_loader(InferenceModel): }) return requested_parameters - def set_input_parameters(self, base_url=""): - self.base_url = base_url.rstrip("/") + def set_input_parameters(self, parameters): + self.base_url = parameters['base_url'].rstrip("/") def _load(self, save_model: bool, initial_load: bool) -> None: tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"] diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index d7fc0863..5666ba8e 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -45,8 +45,8 @@ class model_loader(InferenceModel): }) return requested_parameters - def set_input_parameters(self, colaburl=""): - self.colaburl = colaburl + def set_input_parameters(self, parameters): + self.colaburl = parameters['colaburl'] def _initialize_model(self): return diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index 366fbbb7..b542c712 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -30,6 +30,7 @@ class model_loader(HFTorchInferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True + self.lazy_load = utils.koboldai_vars.lazy_load # Make model path the same as the model name to make this consistent # with the other loading method if it isn't a known model type. This diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index f02cf265..057669d7 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -78,10 +78,10 @@ class model_loader(InferenceModel): }]) return requested_parameters - def set_input_parameters(self, url="", key="", model=""): - self.key = key.strip() - self.model = model - self.url = url + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] + self.url = parameters['url'] def get_cluster_models(self): # Get list of models from public cluster diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index 01c0c037..efbb01d3 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -59,9 +59,9 @@ class model_loader(InferenceModel): }]) return requested_parameters - def set_input_parameters(self, key="", model=""): - self.key = key.strip() - self.model = model + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] def get_oai_models(self): if self.key == "": diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 54781296..3099feaf 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -34,12 +34,12 @@ class HFInferenceModel(InferenceModel): requested_parameters = [] if model_path is not None and os.path.exists(model_path): - model_config = AutoConfig.from_pretrained(model_path) + self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") - layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None if layer_count is not None and layer_count >= 0: if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: @@ -61,11 +61,11 @@ class HFInferenceModel(InferenceModel): "uitype": "slider", "unit": "int", "label": "{} Layers".format(torch.cuda.get_device_name(i)), - "id": "{} Layers".format(i), + "id": "{}_Layers".format(i), "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": break_values[i], "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), @@ -77,11 +77,11 @@ class HFInferenceModel(InferenceModel): "uitype": "slider", "unit": "int", "label": "CPU Layers", - "id": "CPU Layers", + "id": "CPU_Layers", "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": layer_count - sum(break_values), "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", @@ -98,7 +98,7 @@ class HFInferenceModel(InferenceModel): "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": disk_blocks, "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", @@ -122,10 +122,40 @@ class HFInferenceModel(InferenceModel): return requested_parameters - def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False): + def set_input_parameters(self, parameters): + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None self.layers = layers - self.disk_layers = disk_layers - self.use_gpu = use_gpu + self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None + self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.model_name = parameters['id'] + self.path = parameters['path'] if 'path' in parameters else None + + def unload(self): + if hasattr(self, 'model'): + self.model = None + if hasattr(self, 'tokenizer'): + self.tokenizer = None + if hasattr(self, 'model_config'): + self.model_config = None + with torch.no_grad(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated") + for tensor in gc.get_objects(): + try: + if torch.is_tensor(tensor): + tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype)) + except: + pass + gc.collect() + try: + with torch.no_grad(): + torch.cuda.empty_cache() + except: + pass def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults @@ -187,7 +217,7 @@ class HFInferenceModel(InferenceModel): return model_path - basename = utils.koboldai_vars.model.replace("/", "_") + basename = self.model_name.replace("/", "_") if legacy: ret = basename else: diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index d8afafb1..4de13d7b 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -398,7 +398,7 @@ class HFTorchInferenceModel(HFInferenceModel): Embedding._koboldai_patch_causallm_model = self.model def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True): - if not self.lazy_load: + if not utils.koboldai_vars.lazy_load: return if utils.args.breakmodel_disklayers is not None: diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py new file mode 100644 index 00000000..c642c05a --- /dev/null +++ b/modeling/inference_models/readonly.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, + ModelCapabilities, +) + + +class BasicAPIException(Exception): + """To be used for errors when using the Basic API as an interface.""" + + +class model_loader(InferenceModel): + def __init__(self) -> None: + super().__init__() + + # Do not allow API to be served over the API + self.capabilties = ModelCapabilities(api_host=False) + self.tokenizer = self._tokenizer() + self.model = None + self.model_name = "Read Only" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "ReadOnly" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + return requested_parameters + + def set_input_parameters(self, parameters): + return + + def unload(self): + utils.koboldai_vars.noai = False + + def _initialize_model(self): + return + + class _tokenizer(): + def __init__(self): + self._koboldai_header = [] + def decode(self, _input): + return "" + def encode(self, input_text): + return [] + + def _load(self, save_model: bool = False, initial_load: bool = False) -> None: + self.tokenizer = self.tokenizer + self.model = None + utils.koboldai_vars.noai = True + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ): + return GenerationResult( + model=self, + out_batches=np.array([]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/static/koboldai.js b/static/koboldai.js index 1907add8..7f004ff2 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -14,8 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);}); socket.on('popup_items', function(data){popup_items(data);}); socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); -socket.on('show_model_menu', function(data){show_model_menu(data);}); -socket.on('open_model_load_menu', function(data){new_show_model_menu(data);}); +//socket.on('show_model_menu', function(data){show_model_menu(data);}); +socket.on('open_model_load_menu', function(data){show_model_menu(data);}); socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('oai_engines', function(data){oai_engines(data);}); socket.on('buildload', function(data){buildload(data);}); @@ -1502,13 +1502,18 @@ function getModelParameterCount(modelName) { return base * multiplier; } -function new_show_model_menu(data) { +function show_model_menu(data) { //clear out the loadmodelsettings var loadmodelsettings = document.getElementById('loadmodelsettings') while (loadmodelsettings.firstChild) { loadmodelsettings.removeChild(loadmodelsettings.firstChild); } - document.getElementById("modelplugin").classList.add("hidden"); + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + model_plugin.classList.add("hidden"); var accept = document.getElementById("btn_loadmodelaccept"); accept.disabled = false; From e9c845dc2a1eae4927ed2a7417c6aa6969329bb9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 14:14:52 -0400 Subject: [PATCH 04/68] Fix for badwordIDs --- modeling/inference_models/generic_hf_torch.py | 2 +- modeling/inference_models/parents/hf.py | 1 + modeling/inference_models/parents/hf_torch.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index b542c712..d5cf6397 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -43,7 +43,7 @@ class model_loader(HFTorchInferenceModel): self.model_name = os.path.basename( os.path.normpath(utils.koboldai_vars.custmodpth) ) - utils.koboldai_vars.model = self.model_name + utils.koboldai_vars.model = self.model_name # If we specify a model and it's in the root directory, we need to move # it to the models directory (legacy folder structure to new) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 3099feaf..1941a12e 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel): self.model = None self.tokenizer = None + self.badwordsids = koboldai_settings.badwordsids_default def is_valid(self, model_name, model_path, menu_path): try: diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 4de13d7b..7cc16ad5 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -245,7 +245,7 @@ class HFTorchInferenceModel(HFInferenceModel): len(prompt_tokens) + max_new, utils.koboldai_vars.max_length ), repetition_penalty=1.0, - bad_words_ids=utils.koboldai_vars.badwordsids + bad_words_ids=self.badwordsids + additional_bad_words_ids, use_cache=True, num_return_sequences=batch_count, From a9c785d0f0020847e342f18f9910f1ed9c4871dd Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 14:20:14 -0400 Subject: [PATCH 05/68] Fix for Horde --- modeling/inference_models/horde.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 057669d7..bd457197 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -116,9 +116,9 @@ class model_loader(InferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer( - utils.koboldai_vars.cluster_requested_models[0] - if len(utils.koboldai_vars.cluster_requested_models) > 0 - else "gpt2", + self.model + #if len(self.model) > 0 + #else "gpt2", ) def _raw_generate( @@ -166,14 +166,14 @@ class model_loader(InferenceModel): client_agent = "KoboldAI:2.0.0:koboldai.org" cluster_headers = { - "apikey": utils.koboldai_vars.horde_api_key, + "apikey": self.key, "Client-Agent": client_agent, } try: # Create request req = requests.post( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async", + f"{self.url}/api/v2/generate/text/async", json=cluster_metadata, headers=cluster_headers, ) @@ -211,7 +211,7 @@ class model_loader(InferenceModel): while not finished: try: req = requests.get( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}", + f"{self.url}/api/v2/generate/text/status/{request_id}", headers=cluster_agent_headers, ) except requests.exceptions.ConnectionError: From 69d942c00cfd16708f82826fcc0d50355e322c0f Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 20:22:30 -0400 Subject: [PATCH 06/68] Kind of working breakmodel --- aiserver.py | 256 +----------------- koboldai_settings.py | 3 +- modeling/inference_models/generic_hf_torch.py | 7 +- modeling/inference_models/gooseai.py | 31 +++ modeling/inference_models/hf_mtj.py | 2 +- modeling/inference_models/openai.py | 168 +----------- modeling/inference_models/parents/hf.py | 35 ++- modeling/inference_models/parents/hf_torch.py | 27 +- .../parents/openai_gooseai.py | 189 +++++++++++++ static/koboldai.js | 6 + 10 files changed, 281 insertions(+), 443 deletions(-) create mode 100644 modeling/inference_models/gooseai.py create mode 100644 modeling/inference_models/parents/openai_gooseai.py diff --git a/aiserver.py b/aiserver.py index f9e60641..158a6699 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1473,7 +1473,7 @@ def general_startup(override_args=None): koboldai_vars.quiet = True if args.nobreakmodel: - koboldai_vars.nobreakmodel = True + model_loaders['generic_hf_torch'].nobreakmodel = True if args.remote: koboldai_vars.host = True; @@ -1484,6 +1484,9 @@ def general_startup(override_args=None): if args.localtunnel: koboldai_vars.host = True; + if args.lowmem: + model_loaders['generic_hf_torch'].low_mem = True + if args.host != "Disabled": # This means --host option was submitted without an argument # Enable all LAN IPs (0.0.0.0/0) @@ -1516,6 +1519,9 @@ def general_startup(override_args=None): koboldai_vars.trust_remote_code = True if args.cpu: koboldai_vars.use_colab_tpu = False + koboldai_vars.hascuda = False + koboldai_vars.usegpu = False + model_loaders['generic_hf_torch'].nobreakmodel = True koboldai_vars.smandelete = koboldai_vars.host == args.override_delete koboldai_vars.smanrename = koboldai_vars.host == args.override_rename @@ -1545,245 +1551,6 @@ def general_startup(override_args=None): socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) -#==================================================================# -# Load Model -#==================================================================# - -@socketio.on("get_model_info") -def get_model_info(model, directory=""): - logger.info("Selected: {}, {}".format(model, directory)) - # if the model is in the api list - disk_blocks = 0 - key = False - breakmodel = False - gpu = False - layer_count = None - key_value = "" - break_values = [] - url = False - default_url = None - models_on_url = False - multi_online_models = False - show_online_model_select=False - gpu_count = torch.cuda.device_count() - gpu_names = [] - send_horde_models = False - show_custom_model_box = False - for i in range(gpu_count): - gpu_names.append(torch.cuda.get_device_name(i)) - if model in ['Colab', 'API']: - url = True - elif model == 'CLUSTER': - models_on_url = True - show_online_model_select=True - url = True - key = True - default_url = koboldai_vars.horde_url - multi_online_models = True - key_value = koboldai_vars.horde_api_key - url = koboldai_vars.horde_url - if key_value: - send_horde_models = True - elif model in [x.name for x in model_menu['apilist']]: - show_online_model_select=True - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - # Check if API key exists - try: - js = json.load(file) - - if("apikey" in js and js["apikey"] != ""): - # API key exists, grab it and close the file - key_value = js["apikey"] - elif 'oaiapikey' in js and js['oaiapikey'] != "": - key_value = js["oaiapikey"] - if model in ('GooseAI', 'OAI'): - get_oai_models({'model': model, 'key': key_value}) - except json.decoder.JSONDecodeError: - print(":(") - pass - key = True - elif "rwkv" in model.lower(): - pass - elif model == 'ReadOnly': - pass - #elif model == 'customhuggingface': - # show_custom_model_box = True - elif args.cpu: - pass - else: - layer_count = get_layer_count(model, directory=directory) - if layer_count is None: - breakmodel = False - gpu = True - else: - breakmodel = True - if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]: - filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory))) - else: - filename = "settings/{}.breakmodel".format(model.replace("/", "_")) - if path.exists(filename): - with open(filename, "r") as file: - data = [x for x in file.read().split("\n")[:2] if x != ''] - if len(data) < 2: - data.append("0") - break_values, disk_blocks = data - break_values = break_values.split(",") - else: - break_values = [layer_count] - break_values = [int(x) for x in break_values if x != ''] - break_values += [0] * (gpu_count - len(break_values)) - emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, - 'disk_break_value': disk_blocks, 'accelerate': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, - 'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1") - emit('selected_model_info', {'key_value': key_value, 'key':key, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'disk_break_value': disk_blocks, 'disk_break': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select, - 'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False, - 'show_custom_model_box': show_custom_model_box}) - if send_horde_models: - get_cluster_models({'key': key_value, 'url': default_url}) - elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER': - get_oai_models(key_value) - - - -def get_layer_count(model, directory=""): - if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]): - if(model == "GPT2Custom"): - with open(os.path.join(directory, "config.json"), "r") as f: - model_config = json.load(f) - # Get the model_type from the config or assume a model type if it isn't present - else: - if(directory): - model = directory - from transformers import AutoConfig - if(os.path.isdir(model.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - elif(is_model_downloaded(model)): - model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(directory)): - model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - else: - model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache") - try: - if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel: - return utils.num_layers(model_config) - else: - return None - except: - return None - else: - return None - -@socketio.on('OAI_Key_Update') -def get_oai_models(data): - key = data['key'] - model = data['model'] - koboldai_vars.oaiapikey = key - if model == 'OAI': - url = "https://api.openai.com/v1/engines" - elif model == 'GooseAI': - url = "https://api.goose.ai/v1/engines" - else: - return - - # Get list of models from OAI - logger.init("OAI Engines", status="Retrieving") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+key - } - ) - if(req.status_code == 200): - r = req.json() - engines = r["data"] - try: - engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines] - except: - logger.error(engines) - raise - - online_model = "" - changed=False - - #Save the key - if not path.exists("settings"): - # If the client settings file doesn't exist, create it - # Write API key to file - os.makedirs('settings', exist_ok=True) - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - js = json.load(file) - if 'online_model' in js: - online_model = js['online_model'] - if "apikey" in js: - if js['apikey'] != key: - changed=True - else: - js = {} - changed=True - - if changed: - with open("settings/{}.v2_settings".format(model), "w") as file: - js["apikey"] = key - file.write(json.dumps(js, indent=3)) - - logger.init_ok("OAI Engines", status="OK") - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - else: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("OAI Engines", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) - -@socketio.on("get_cluster_models") -def get_cluster_models(msg): - koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key - url = msg['url'] or koboldai_vars.horde_url - koboldai_vars.horde_url = url - # Get list of models from public cluster - print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="") - try: - req = requests.get(f"{url}/api/v2/status/models?type=text") - except: - logger.init_err("KAI Horde Models", status="Failed") - logger.error("Provided KoboldAI Horde URL unreachable") - emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) - return - if not req.ok: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("KAI Horde Models", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") - return - - engines = req.json() - logger.debug(engines) - try: - engines = [[en["name"], en["name"]] for en in engines] - except: - logger.error(engines) - raise - logger.debug(engines) - - online_model = "" - savesettings() - - logger.init_ok("KAI Horde Models", status="OK") - - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - def unload_model(): global model @@ -1845,7 +1612,6 @@ def load_model(plugin, initial_load=False): # loadmodelsettings() # loadsettings() logger.init("GPU support", status="Searching") - koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel if(args.breakmodel is not None and args.breakmodel): logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).") @@ -1861,12 +1627,7 @@ def load_model(plugin, initial_load=False): else: logger.init_warn("GPU support", status="Not Found") - if args.cpu: - koboldai_vars.usegpu = False - gpu_layers = None - disk_layers = None - koboldai_vars.breakmodel = False - elif koboldai_vars.hascuda: + if koboldai_vars.hascuda: if(koboldai_vars.bmsupported): koboldai_vars.usegpu = False koboldai_vars.breakmodel = True @@ -1879,6 +1640,7 @@ def load_model(plugin, initial_load=False): model = model_loaders[plugin] model.load(initial_load=initial_load) + logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer if model: diff --git a/koboldai_settings.py b/koboldai_settings.py index d8416df2..e9562ffc 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -710,7 +710,6 @@ class model_settings(settings): self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) self.sampler_order = [6, 0, 1, 2, 3, 4, 5] self.newlinemode = "n" - self.lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage self.presets = [] # Holder for presets self.selected_preset = "" self.uid_presets = [] @@ -1236,7 +1235,7 @@ class system_settings(settings): self.corescript = "default.lua" # Filename of corescript to load self.gpu_device = 0 # Which PyTorch device to use when using pure GPU generation self.savedir = os.getcwd()+"\\stories" - self.hascuda = False # Whether torch has detected CUDA on the system + self.hascuda = torch.cuda.is_available() # Whether torch has detected CUDA on the system self.usegpu = False # Whether to launch pipeline with GPU support self.splist = [] self.spselect = "" # Temporary storage for soft prompt filename to load diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index d5cf6397..c228e2ee 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -30,7 +30,6 @@ class model_loader(HFTorchInferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True - self.lazy_load = utils.koboldai_vars.lazy_load # Make model path the same as the model name to make this consistent # with the other loading method if it isn't a known model type. This @@ -69,12 +68,14 @@ class model_loader(HFTorchInferenceModel): # If we're using torch_lazy_loader, we need to get breakmodel config # early so that it knows where to load the individual model tensors + logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel)) if ( self.lazy_load and utils.koboldai_vars.hascuda - and utils.koboldai_vars.breakmodel - and not utils.koboldai_vars.nobreakmodel + and self.breakmodel + and not self.nobreakmodel ): + logger.debug("loading breakmodel") self.breakmodel_device_config(self.model_config) if self.lazy_load: diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py new file mode 100644 index 00000000..08d8ea06 --- /dev/null +++ b/modeling/inference_models/gooseai.py @@ -0,0 +1,31 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + +from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader + + + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_loader(openai_gooseai_model_loader): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.url = "https://api.goose.ai/v1/engines" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "GooseAI" \ No newline at end of file diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index c99e9a05..759feb65 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -27,7 +27,7 @@ class model_loader(HFInferenceModel): #model_name: str, ) -> None: super().__init__() - + self.hf_torch = False self.model_config = None self.capabilties = ModelCapabilities( embedding_manipulation=False, diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index efbb01d3..cad2a7f2 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -11,6 +11,8 @@ from modeling.inference_model import ( InferenceModel, ) +from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader + class OpenAIAPIError(Exception): @@ -18,172 +20,12 @@ class OpenAIAPIError(Exception): super().__init__(f"{error_type}: {error_message}") -class model_loader(InferenceModel): +class model_loader(openai_gooseai_model_loader): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): super().__init__() - self.key = "" + self.url = "https://api.openai.com/v1/engines" def is_valid(self, model_name, model_path, menu_path): - return model_name == "OAI" or model_name == "GooseAI" - - def get_requested_parameters(self, model_name, model_path, menu_path): - self.source = model_name - requested_parameters = [] - requested_parameters.extend([{ - "uitype": "text", - "unit": "text", - "label": "Key", - "id": "key", - "default": "", - "check": {"value": "", 'check': "!="}, - "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", - "menu_path": "", - "refresh_model_inputs": True, - "extra_classes": "" - }, - { - "uitype": "dropdown", - "unit": "text", - "label": "Model", - "id": "model", - "default": "", - "check": {"value": "", 'check': "!="}, - "tooltip": "Which model to use when running OpenAI/GooseAI.", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': self.get_oai_models(), - - }]) - return requested_parameters - - def set_input_parameters(self, parameters): - self.key = parameters['key'].strip() - self.model = parameters['model'] - - def get_oai_models(self): - if self.key == "": - return [] - if self.source == 'OAI': - url = "https://api.openai.com/v1/engines" - elif self.source == 'GooseAI': - url = "https://api.goose.ai/v1/engines" - else: - return - - # Get list of models from OAI - logger.init("OAI Engines", status="Retrieving") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+self.key - } - ) - if(req.status_code == 200): - r = req.json() - engines = r["data"] - try: - engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] - except: - logger.error(engines) - raise - - online_model = "" - - - logger.init_ok("OAI Engines", status="OK") - return engines - else: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("OAI Engines", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) - return [] - - - def _load(self, save_model: bool, initial_load: bool) -> None: - self.tokenizer = self._get_tokenizer("gpt2") - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - - if seed is not None: - logger.warning( - "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." - ) - - decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) - - # Store context in memory to use it for comparison with generated content - utils.koboldai_vars.lastctx = decoded_prompt - - # Build request JSON data - # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround - # as the koboldai_vars.model will always be OAI - if "GooseAI" in utils.koboldai_vars.configname: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_a": gen_settings.top_a, - "top_p": gen_settings.top_p, - "top_k": gen_settings.top_k, - "tfs": gen_settings.tfs, - "typical_p": gen_settings.typical, - "repetition_penalty": gen_settings.rep_pen, - "repetition_penalty_slope": gen_settings.rep_pen_slope, - "repetition_penalty_range": gen_settings.rep_pen_range, - "n": batch_count, - # TODO: Implement streaming - "stream": False, - } - else: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_p": gen_settings.top_p, - "frequency_penalty": gen_settings.rep_pen, - "n": batch_count, - "stream": False, - } - - req = requests.post( - utils.koboldai_vars.oaiurl, - json=reqdata, - headers={ - "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey, - "Content-Type": "application/json", - }, - ) - - j = req.json() - - if not req.ok: - # Send error message to web client - if "error" in j: - error_type = j["error"]["type"] - error_message = j["error"]["message"] - else: - error_type = "Unknown" - error_message = "Unknown" - raise OpenAIAPIError(error_type, error_message) - - outputs = [out["text"] for out in j["choices"]] - return GenerationResult( - model=self, - out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), - prompt=prompt_tokens, - is_whole_generation=True, - single_line=single_line, - ) + return model_name == "OAI" \ No newline at end of file diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 1941a12e..c7a781d7 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -22,18 +22,19 @@ class HFInferenceModel(InferenceModel): def is_valid(self, model_name, model_path, menu_path): try: if model_path is not None and os.path.exists(model_path): - model_config = AutoConfig.from_pretrained(model_path) + self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") return True except: return False def get_requested_parameters(self, model_name, model_path, menu_path): requested_parameters = [] - + if not self.hf_torch: + return [] if model_path is not None and os.path.exists(model_path): self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): @@ -124,14 +125,20 @@ class HFInferenceModel(InferenceModel): return requested_parameters def set_input_parameters(self, parameters): - gpu_count = torch.cuda.device_count() - layers = [] - for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None - self.layers = layers - self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None - self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + if self.hf_torch: + import breakmodel + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.layers = layers + self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 + breakmodel.gpu_blocks = layers + breakmodel.disk_blocks = self.disk_layers + self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.model_type = self.get_model_type() + self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.model_name = parameters['id'] self.path = parameters['path'] if 'path' in parameters else None @@ -157,6 +164,10 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass + if self.hf_torch: + breakmodel.breakmodel = True + breakmodel.gpu_blocks = [] + breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 7cc16ad5..84c60a6c 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False class HFTorchInferenceModel(HFInferenceModel): - def __init__( - self, - #model_name: str, - #lazy_load: bool, - #low_mem: bool, - ) -> None: + def __init__(self) -> None: super().__init__() - #self.lazy_load = lazy_load - #self.low_mem = low_mem + self.hf_torch = True + self.lazy_load = True + self.low_mem = False + self.nobreakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -398,7 +395,7 @@ class HFTorchInferenceModel(HFInferenceModel): Embedding._koboldai_patch_causallm_model = self.model def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True): - if not utils.koboldai_vars.lazy_load: + if not self.lazy_load: return if utils.args.breakmodel_disklayers is not None: @@ -819,14 +816,14 @@ class HFTorchInferenceModel(HFInferenceModel): elif ( utils.args.breakmodel_gpulayers is not None or utils.args.breakmodel_disklayers is not None + or breakmodel.gpu_blocks != [] ): try: - if not utils.args.breakmodel_gpulayers: - breakmodel.gpu_blocks = [] - else: - breakmodel.gpu_blocks = list( - map(int, utils.args.breakmodel_gpulayers.split(",")) - ) + if breakmodel.gpu_blocks == []: + if utils.args.breakmodel_gpulayers: + breakmodel.gpu_blocks = list( + map(int, utils.args.breakmodel_gpulayers.split(",")) + ) assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count() s = n_layers for i in range(len(breakmodel.gpu_blocks)): diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py new file mode 100644 index 00000000..621ccbad --- /dev/null +++ b/modeling/inference_models/parents/openai_gooseai.py @@ -0,0 +1,189 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + + + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_loader(InferenceModel): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.key = "" + self.url = "https://api.goose.ai/v1/engines" + #if self.source == 'OAI': + # url = "https://api.openai.com/v1/engines" + #elif self.source == 'GooseAI': + # url = "https://api.goose.ai/v1/engines" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" or model_name == "GooseAI" + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.get_oai_models(), + + }]) + return requested_parameters + + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] + + def get_oai_models(self): + if self.key == "": + return [] + + + # Get list of models from OAI + logger.init("OAI Engines", status="Retrieving") + req = requests.get( + self.url, + headers = { + 'Authorization': 'Bearer '+self.key + } + ) + if(req.status_code == 200): + r = req.json() + engines = r["data"] + try: + engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] + except: + logger.error(engines) + raise + + online_model = "" + + + logger.init_ok("OAI Engines", status="OK") + return engines + else: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("OAI Engines", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) + return [] + + + def _load(self, save_model: bool, initial_load: bool) -> None: + self.tokenizer = self._get_tokenizer("gpt2") + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ) -> GenerationResult: + + if seed is not None: + logger.warning( + "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." + ) + + decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) + + # Store context in memory to use it for comparison with generated content + utils.koboldai_vars.lastctx = decoded_prompt + + # Build request JSON data + # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround + # as the koboldai_vars.model will always be OAI + if "GooseAI" in utils.koboldai_vars.configname: + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_a": gen_settings.top_a, + "top_p": gen_settings.top_p, + "top_k": gen_settings.top_k, + "tfs": gen_settings.tfs, + "typical_p": gen_settings.typical, + "repetition_penalty": gen_settings.rep_pen, + "repetition_penalty_slope": gen_settings.rep_pen_slope, + "repetition_penalty_range": gen_settings.rep_pen_range, + "n": batch_count, + # TODO: Implement streaming + "stream": False, + } + else: + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_p": gen_settings.top_p, + "frequency_penalty": gen_settings.rep_pen, + "n": batch_count, + "stream": False, + } + + req = requests.post( + self.url, + json=reqdata, + headers={ + "Authorization": "Bearer " + self.key, + "Content-Type": "application/json", + }, + ) + + j = req.json() + + if not req.ok: + # Send error message to web client + if "error" in j: + error_type = j["error"]["type"] + error_message = j["error"]["message"] + else: + error_type = "Unknown" + error_message = "Unknown" + raise OpenAIAPIError(error_type, error_message) + + outputs = [out["text"] for out in j["choices"]] + return GenerationResult( + model=self, + out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/static/koboldai.js b/static/koboldai.js index 7f004ff2..ab7f7832 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1652,6 +1652,12 @@ function selected_model_info(data) { while (loadmodelsettings.firstChild) { loadmodelsettings.removeChild(loadmodelsettings.firstChild); } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + var accept = document.getElementById("btn_loadmodelaccept"); accept.disabled = false; From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 20:40:05 -0400 Subject: [PATCH 07/68] Working(?) breakmodel --- modeling/inference_models/parents/hf.py | 3 +- modeling/inference_models/parents/hf_torch.py | 52 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index c7a781d7..67fd8b15 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel): self.model = None self.tokenizer = None self.badwordsids = koboldai_settings.badwordsids_default + self.usegpu = False def is_valid(self, model_name, model_path, menu_path): try: @@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel): self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers - self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.model_name = parameters['id'] diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 84c60a6c..d942a572 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") + logger.debug(traceback_string) try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: @@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel): ): device_map[key] = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else breakmodel.primary_device ) else: @@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel): ) device = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right( @@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel): if always_use or ( utils.koboldai_vars.hascuda and self.low_mem - and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel) + and (self.usegpu or self.breakmodel) ): original_dtype = torch.get_default_dtype() torch.set_default_dtype(torch.float16) @@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel): -1, utils.num_layers(config), ): - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = True + logger.debug("All layers on same GPU. Breakmodel disabled") + self.breakmodel = False + self.usegpu = True utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1 return @@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel): import breakmodel breakmodel.primary_device = "cpu" - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = False + self.breakmodel = False + self.usegpu = False return From aaa91338996a652960bfa8b9461c2f0de8d82bee Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 21:22:33 -0400 Subject: [PATCH 08/68] Disk Cache working UI valid marker broken for disk cache --- aiserver.py | 4 +--- modeling/inference_models/parents/hf.py | 6 +++--- modeling/inference_models/parents/hf_torch.py | 8 ++++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/aiserver.py b/aiserver.py index 158a6699..a306449e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1639,7 +1639,7 @@ def load_model(plugin, initial_load=False): model = model_loaders[plugin] - model.load(initial_load=initial_load) + model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer @@ -6156,8 +6156,6 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - logger.info("loading Model") - logger.info(data) model_loaders[data['plugin']].set_input_parameters(data) load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 67fd8b15..03955d88 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -53,12 +53,12 @@ class HFInferenceModel(InferenceModel): break_values = break_values.split(",") else: break_values = [layer_count] - disk_blocks = None + disk_blocks = 0 break_values = [int(x) for x in break_values if x != '' and x is not None] gpu_count = torch.cuda.device_count() break_values += [0] * (gpu_count - len(break_values)) if disk_blocks is not None: - break_values += [disk_blocks] + break_values += [int(disk_blocks)] for i in range(gpu_count): requested_parameters.append({ "uitype": "slider", @@ -134,7 +134,7 @@ class HFInferenceModel(InferenceModel): layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None self.layers = layers - self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 + self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index d942a572..aae3ada3 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -780,6 +780,7 @@ class HFTorchInferenceModel(HFInferenceModel): device_count = torch.cuda.device_count() if device_count < 2: primary = None + logger.debug("n_layers: {}".format(n_layers)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -835,10 +836,7 @@ class HFTorchInferenceModel(HFInferenceModel): s -= breakmodel.gpu_blocks[i] assert sum(breakmodel.gpu_blocks) <= n_layers n_layers -= sum(breakmodel.gpu_blocks) - if utils.args.breakmodel_disklayers is not None: - assert utils.args.breakmodel_disklayers <= n_layers - breakmodel.disk_blocks = utils.args.breakmodel_disklayers - n_layers -= utils.args.breakmodel_disklayers + n_layers -= breakmodel.disk_blocks except: logger.warning( "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0." @@ -949,6 +947,8 @@ class HFTorchInferenceModel(HFInferenceModel): logger.init_ok("Final device configuration:", status="Info") self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device) + with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file: + file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks)) # If all layers are on the same device, use the old GPU generation mode while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: From f027d8b6e56393c12b8cd1611a3c0b7cc90802c9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 17 May 2023 21:15:31 -0400 Subject: [PATCH 09/68] Better working valid detection and named model backends for UI --- aiserver.py | 53 +++-- modeling/inference_models/api.py | 3 +- modeling/inference_models/basic_api.py | 4 +- modeling/inference_models/generic_hf_torch.py | 3 +- modeling/inference_models/gooseai.py | 5 +- modeling/inference_models/hf_mtj.py | 4 +- modeling/inference_models/horde.py | 3 +- modeling/inference_models/openai.py | 6 +- modeling/inference_models/parents/hf.py | 24 +- .../parents/openai_gooseai.py | 2 +- modeling/inference_models/readonly.py | 3 +- modeling/inference_models/rwkv.py | 5 +- static/koboldai.js | 206 +++++++++++------- templates/templates.html | 5 +- 14 files changed, 191 insertions(+), 135 deletions(-) diff --git a/aiserver.py b/aiserver.py index 92dde7f4..314fb512 100644 --- a/aiserver.py +++ b/aiserver.py @@ -622,12 +622,12 @@ from modeling.patches import patch_transformers #Load all of the model importers import importlib -model_loader_code = {} -model_loaders = {} +model_backend_code = {} +model_backends = {} for module in os.listdir("./modeling/inference_models"): if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py': - model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) - model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader() + model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) + model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend() old_socketio_on = socketio.on @@ -1354,6 +1354,7 @@ def general_startup(override_args=None): parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") + parser.add_argument("--model_backend", help="Specify the model backend you want to use") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") @@ -1447,6 +1448,12 @@ def general_startup(override_args=None): args.max_summary_length = int(args.max_summary_length) if args.model: + # At this point we have to try to load the model through the selected backend + if not args.model_backend: + logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") + exit() + #if + koboldai_vars.model = args.model; koboldai_vars.revision = args.revision koboldai_settings.multi_story = args.multi_story @@ -1472,7 +1479,7 @@ def general_startup(override_args=None): koboldai_vars.quiet = True if args.nobreakmodel: - model_loaders['generic_hf_torch'].nobreakmodel = True + model_backends['Huggingface'].nobreakmodel = True if args.remote: koboldai_vars.host = True; @@ -1484,7 +1491,7 @@ def general_startup(override_args=None): koboldai_vars.host = True; if args.lowmem: - model_loaders['generic_hf_torch'].low_mem = True + model_backends['Huggingface'].low_mem = True if args.host != "Disabled": # This means --host option was submitted without an argument @@ -1520,7 +1527,7 @@ def general_startup(override_args=None): koboldai_vars.use_colab_tpu = False koboldai_vars.hascuda = False koboldai_vars.usegpu = False - model_loaders['generic_hf_torch'].nobreakmodel = True + model_backends['Huggingface'].nobreakmodel = True koboldai_vars.smandelete = koboldai_vars.host == args.override_delete koboldai_vars.smanrename = koboldai_vars.host == args.override_rename @@ -1582,7 +1589,7 @@ def unload_model(): koboldai_vars.badwordsids = koboldai_settings.badwordsids_default -def load_model(plugin, initial_load=False): +def load_model(model_backend, initial_load=False): global model global tokenizer global model_config @@ -1637,7 +1644,7 @@ def load_model(plugin, initial_load=False): koboldai_vars.default_preset = koboldai_settings.default_preset - model = model_loaders[plugin] + model = model_backends[model_backend] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) logger.debug("Model Type: {}".format(koboldai_vars.model_type)) @@ -6103,33 +6110,23 @@ def UI_2_select_model(data): emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: #Get load methods - logger.debug("Asking for model info on potential model: {}".format(data)) - valid = False if 'path' not in data or data['path'] == "": valid_loaders = {} - for model_loader in model_loaders: - logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]))) - if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): - valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - valid = True - if valid: - logger.debug("Valid Loaders: {}".format(valid_loaders)) - emit("selected_model_info", valid_loaders) - if not valid and 'path' in data: + for model_backend in model_backends: + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) + else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) output = [] for path in paths: valid=False - for model_loader in model_loaders: - if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"): + for model_backend in model_backends: + if model_backends[model_backend].is_valid(path[1], path[0], "Custom"): valid=True break output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) - emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) - elif not valid: - logger.error("Nothing can load the model: {}".format(valid_loaders)) - + emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return @@ -6156,7 +6153,7 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - model_loaders[data['plugin']].set_input_parameters(data) + model_backends[data['plugin']].set_input_parameters(data) load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) @@ -10671,7 +10668,7 @@ for schema in config_endpoint_schemas: def startup(): if koboldai_vars.model == "" or koboldai_vars.model is None: koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True}) + socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True}) print("", end="", flush=True) diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index 5bddd714..409158f5 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -17,12 +17,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "KoboldAI API" class APIException(Exception): """To be used for errors when using the Kobold API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() #self.base_url = "" diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index 5666ba8e..cca9652b 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -15,11 +15,13 @@ from modeling.inference_model import ( ) +model_backend_name = "KoboldAI Old Colab Method" + class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index c228e2ee..f7a00f45 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -22,8 +22,9 @@ except ModuleNotFoundError as e: from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel +model_backend_name = "Huggingface" -class model_loader(HFTorchInferenceModel): +class model_backend(HFTorchInferenceModel): def _initialize_model(self): return diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py index 08d8ea06..9d6e8771 100644 --- a/modeling/inference_models/gooseai.py +++ b/modeling/inference_models/gooseai.py @@ -11,16 +11,17 @@ from modeling.inference_model import ( InferenceModel, ) -from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader +from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend +model_backend_name = "GooseAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class model_loader(openai_gooseai_model_loader): +class model_backend(openai_gooseai_model_backend): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 4e82d348..6351eca2 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -19,10 +19,10 @@ from modeling.inference_model import ( from modeling.inference_models.parents.hf import HFInferenceModel from modeling.tokenizer import GenericTokenizer +model_backend_name = "Huggingface MTJ" - -class model_loader(HFInferenceModel): +class model_backend(HFInferenceModel): def __init__( self, #model_name: str, diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index bd457197..6c880bbe 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -16,12 +16,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "Horde" class HordeException(Exception): """To be used for errors on server side of the Horde.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() self.url = "https://horde.koboldai.net" diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index cad2a7f2..19a7d1e6 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -11,16 +11,16 @@ from modeling.inference_model import ( InferenceModel, ) -from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader - +from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend +model_backend_name = "OpenAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class model_loader(openai_gooseai_model_loader): +class model_backend(openai_gooseai_model_backend): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index ba291c3f..69549bd5 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -1,7 +1,7 @@ import os from typing import Optional from transformers import AutoConfig - +import warnings import utils import koboldai_settings from logger import logger @@ -43,7 +43,7 @@ class HFInferenceModel(InferenceModel): else: self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None - if layer_count is not None and layer_count >= 0: + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: data = [x for x in file.read().split("\n")[:2] if x != ''] @@ -128,15 +128,17 @@ class HFInferenceModel(InferenceModel): def set_input_parameters(self, parameters): if self.hf_torch: import breakmodel - gpu_count = torch.cuda.device_count() - layers = [] - for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None - self.layers = layers - self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 - breakmodel.gpu_blocks = layers - breakmodel.disk_blocks = self.disk_layers + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.layers = layers + self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 + breakmodel.gpu_blocks = layers + breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py index 621ccbad..871ea5ce 100644 --- a/modeling/inference_models/parents/openai_gooseai.py +++ b/modeling/inference_models/parents/openai_gooseai.py @@ -18,7 +18,7 @@ class OpenAIAPIError(Exception): super().__init__(f"{error_type}: {error_message}") -class model_loader(InferenceModel): +class model_backend(InferenceModel): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py index c642c05a..92531af4 100644 --- a/modeling/inference_models/readonly.py +++ b/modeling/inference_models/readonly.py @@ -14,12 +14,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "Read Only" class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py index d14d8c81..fa6497b7 100644 --- a/modeling/inference_models/rwkv.py +++ b/modeling/inference_models/rwkv.py @@ -55,7 +55,10 @@ MODEL_FILES = { } -class model_loader(InferenceModel): +model_backend_name = "RWKV" + + +class model_backend(InferenceModel): def __init__( self, #model_name: str, diff --git a/static/koboldai.js b/static/koboldai.js index de3ab324..905403c1 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1645,8 +1645,85 @@ function show_model_menu(data) { } +function model_settings_checker() { + //get check value: + missing_element = false; + if (this.check_data != null) { + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid || missing_element) { + //if we are supposed to refresh when this value changes we'll resubmit + if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { + console.log("resubmit"); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } + var accept = document.getElementById("btn_loadmodelaccept"); + ok_to_load = true; + for (const item of document.getElementsByClassName("input_error")) { + if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) { + ok_to_load = false; + break; + } + } + + if (ok_to_load) { + accept.classList.remove("disabled"); + accept.disabled = false; + } else { + accept.classList.add("disabled"); + accept.disabled = true; + } +} -function selected_model_info(data) { +function selected_model_info(sent_data) { + const data = sent_data['model_backends']; //clear out the loadmodelsettings var loadmodelsettings = document.getElementById('loadmodelsettings') while (loadmodelsettings.firstChild) { @@ -1667,7 +1744,10 @@ function selected_model_info(data) { for (const area of document.getElementsByClassName("model_plugin_settings_area")) { area.classList.add("hidden"); } - document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + if (document.getElementById(this.value + "_settings_area")) { + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + model_settings_checker() } //create the content for (const [loader, items] of Object.entries(data)) { @@ -1679,7 +1759,11 @@ function selected_model_info(data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); + if (loader == sent_data['preselected']) { + modelplugin.value = sent_data['preselected']; + } + //create the user input for each requested input for (item of items) { let new_setting = document.getElementById('blank_model_settings').cloneNode(true); new_setting.id = loader; @@ -1687,73 +1771,7 @@ function selected_model_info(data) { new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); - onchange_event = function () { - //get check value: - if ('sum' in this.check_data) { - check_value = 0 - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); - } - } - } else { - check_value = this.value - } - if (this.check_data['check'] == "=") { - valid = (check_value == this.check_data['value']); - } else if (this.check_data['check'] == "!=") { - valid = (check_value != this.check_data['value']); - } else if (this.check_data['check'] == ">=") { - valid = (check_value >= this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value <= this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value > this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value < this.check_data['value']); - } - if (valid) { - //if we are supposed to refresh when this value changes we'll resubmit - if (this.getAttribute("refresh_model_inputs") == "true") { - console.log("resubmit"); - } - if ('sum' in this.check_data) { - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); - } - } - } else { - this.closest(".setting_container_model").classList.remove('input_error'); - this.closest(".setting_container_model").removeAttribute("tooltip"); - } - var accept = document.getElementById("btn_loadmodelaccept"); - if (document.getElementsByClassName("input_error").length) - accept.disabled = true; - } else { - if ('sum' in this.check_data) { - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); - } - } - } else { - this.closest(".setting_container_model").classList.add('input_error'); - this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); - } - } - var accept = document.getElementById("btn_loadmodelaccept"); - if (document.getElementsByClassName("input_error").length > 0) { - accept.classList.add("disabled"); - accept.disabled = true; - } else { - accept.classList.remove("disabled"); - accept.disabled = false; - } - - } + onchange_event = model_settings_checker; if (item['uitype'] == "slider") { var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); slider_number.value = item['default']; @@ -1764,6 +1782,7 @@ function selected_model_info(data) { slider.value = item['default']; slider.min = item['min']; slider.max = item['max']; + slider.setAttribute("data_type", item['unit']); slider.id = loader + "|" + item['id'] + "_value"; if ('check' in item) { slider.check_data = item['check']; @@ -1777,25 +1796,37 @@ function selected_model_info(data) { slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.noresubmit = true; slider.onchange(); + slider.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_slider').remove(); } if (item['uitype'] == "toggle") { - var toggle = new_setting.querySelector('#blank_model_settings_toggle'); + toggle = document.createElement("input"); + toggle.type='checkbox'; + toggle.classList.add("setting_item_input"); + toggle.classList.add("blank_model_settings_input"); + toggle.classList.add("model_settings_input"); toggle.id = loader + "|" + item['id'] + "_value"; toggle.checked = item['default']; - toggle.onchange = onchange_event; + toggle.onclick = onchange_event; + toggle.setAttribute("data_type", item['unit']); toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { toggle.check_data = item['check']; } else { toggle.check_data = null; } - toggle.onchange(); + new_setting.querySelector('#blank_model_settings_toggle').append(toggle); + setTimeout(function() { + $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"}); + }, 200); + toggle.noresubmit = true; + toggle.onclick(); + toggle.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden"); - new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_toggle').remove(); } if (item['uitype'] == "dropdown") { var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); @@ -1807,6 +1838,7 @@ function selected_model_info(data) { select_element.append(new_option); } select_element.value = item['default']; + select_element.setAttribute("data_type", item['unit']); select_element.onchange = onchange_event; select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { @@ -1814,14 +1846,17 @@ function selected_model_info(data) { } else { select_element.check_data = null; } + select_element.noresubmit = true; select_element.onchange(); + select_element.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_dropdown').remove(); } if (item['uitype'] == "password") { var password_item = new_setting.querySelector('#blank_model_settings_password'); password_item.id = loader + "|" + item['id'] + "_value"; password_item.value = item['default']; + password_item.setAttribute("data_type", item['unit']); password_item.onchange = onchange_event; password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { @@ -1829,24 +1864,29 @@ function selected_model_info(data) { } else { password_item.check_data = null; } + password_item.noresubmit = true; password_item.onchange(); + password_item.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_password').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_password').remove(); } if (item['uitype'] == "text") { var text_item = new_setting.querySelector('#blank_model_settings_text'); text_item.id = loader + "|" + item['id'] + "_value"; text_item.value = item['default']; text_item.onchange = onchange_event; + text_item.setAttribute("data_type", item['unit']); text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { text_item.check_data = item['check']; } else { text_item.check_data = null; } + text_item.noresubmit = true; text_item.onchange(); + text_item.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_text').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_text').remove(); } model_area.append(new_setting); @@ -1891,7 +1931,15 @@ function load_model() { //get an object of all the input settings from the user data = {} for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - data[element.id.split("|")[1].replace("_value", "")] = element.value; + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; diff --git a/templates/templates.html b/templates/templates.html index 49cd3e5b..49fa99f6 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -162,9 +162,8 @@ - - - + + From 182ecff20273b4921f4cefa04f7a845d22fc58ac Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 18 May 2023 16:01:17 -0400 Subject: [PATCH 10/68] Added in model backend to the command line arguments --- aiserver.py | 70 ++++++++++++++++--------- modeling/inference_model.py | 2 + modeling/inference_models/horde.py | 1 - modeling/inference_models/parents/hf.py | 8 ++- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/aiserver.py b/aiserver.py index 314fb512..235732ec 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1355,6 +1355,7 @@ def general_startup(override_args=None): parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") parser.add_argument("--model_backend", help="Specify the model backend you want to use") + parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") @@ -1447,14 +1448,6 @@ def general_startup(override_args=None): args.max_summary_length = int(args.max_summary_length) - if args.model: - # At this point we have to try to load the model through the selected backend - if not args.model_backend: - logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") - exit() - #if - - koboldai_vars.model = args.model; koboldai_vars.revision = args.revision koboldai_settings.multi_story = args.multi_story @@ -1556,6 +1549,37 @@ def general_startup(override_args=None): socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) + if args.model: + # At this point we have to try to load the model through the selected backend + if not args.model_backend: + logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") + logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends]))) + exit() + if args.model_backend not in model_backends: + logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends]))) + exit() + #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it + parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "") + ok_to_load = True + arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {} + for parameter in parameters: + if parameter['default'] == "" or parameter['id'] not in arg_parameters: + ok_to_load = False + elif parameter['id'] not in arg_parameters: + arg_parameters[parameter] = parameter['default'] + if not ok_to_load: + logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)") + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) + exit() + arg_parameters['id'] = args.model + arg_parameters['model_path'] = args.path + arg_parameters['menu_path'] = "" + model_backends[args.model_backend].set_input_parameters(arg_parameters) + koboldai_vars.model = args.model + return args.model_backend + else: + return "Read Only" + def unload_model(): @@ -1633,13 +1657,13 @@ def load_model(model_backend, initial_load=False): else: logger.init_warn("GPU support", status="Not Found") - if koboldai_vars.hascuda: - if(koboldai_vars.bmsupported): - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = True - else: - koboldai_vars.breakmodel = False - koboldai_vars.usegpu = use_gpu + #if koboldai_vars.hascuda: + # if(koboldai_vars.bmsupported): + # koboldai_vars.usegpu = False + # koboldai_vars.breakmodel = True + # else: + # koboldai_vars.breakmodel = False + # koboldai_vars.usegpu = use_gpu else: koboldai_vars.default_preset = koboldai_settings.default_preset @@ -10665,10 +10689,8 @@ for schema in config_endpoint_schemas: #==================================================================# # Final startup commands to launch Flask app #==================================================================# -def startup(): - if koboldai_vars.model == "" or koboldai_vars.model is None: - koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True}) +def startup(command_line_backend): + socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True}) print("", end="", flush=True) @@ -10677,7 +10699,7 @@ def run(): global app global tpu_mtj_backend - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") if koboldai_vars.host: @@ -10725,7 +10747,7 @@ def run(): cloudflare = _run_cloudflared(port) koboldai_vars.cloudflare_link = cloudflare - startup() + startup(command_line_backend) if(args.localtunnel or args.ngrok or args.remote): with open('cloudflare.log', 'w') as cloudflarelog: @@ -10745,7 +10767,7 @@ def run(): else: socketio.run(app, port=port) else: - startup() + startup(command_line_backend) if args.unblock: if not args.no_ui: try: @@ -10773,13 +10795,13 @@ def run(): if __name__ == "__main__": run() else: - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") Session(app) logger.init_ok("Flask", status="OK") patch_transformers() - startup() + startup(command_line_backend) koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000 print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 4a29a027..c3fff46f 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -178,6 +178,8 @@ class InferenceModel: return {} def set_input_parameters(self, parameters): + for parameter in parameters: + setattr(self, parameter, parameters[parameter]) return def load(self, save_model: bool = False, initial_load: bool = False) -> None: diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 6c880bbe..5d8552fb 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -86,7 +86,6 @@ class model_backend(InferenceModel): def get_cluster_models(self): # Get list of models from public cluster - logger.info("Retrieving engine list...") try: req = requests.get(f"{self.url}/api/v2/status/models?type=text") except: diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 69549bd5..70143b69 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -133,10 +133,14 @@ class HFInferenceModel(InferenceModel): gpu_count = torch.cuda.device_count() layers = [] for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None) self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + if isinstance(self.cpu_layers, str): + self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 self.layers = layers - self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 + self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0 + if isinstance(self.disk_layers, str): + self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None From 4040538d3438acd56e4a9121708a79b6d0d5da83 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 18 May 2023 18:34:00 -0400 Subject: [PATCH 11/68] Model Backends now defined in the menu --- aiserver.py | 38 ++++++++++++++++-------------- modeling/inference_models/horde.py | 3 ++- static/koboldai.css | 11 +++++++-- static/koboldai.js | 16 +++++++++++-- templates/popups.html | 2 +- 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/aiserver.py b/aiserver.py index 235732ec..aeebdbc1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -178,11 +178,13 @@ class MenuModel(MenuItem): vram_requirements: str = "", model_type: MenuModelType = MenuModelType.HUGGINGFACE, experimental: bool = False, + model_backend: str = "Huggingface", ) -> None: super().__init__(label, name, experimental) self.model_type = model_type self.vram_requirements = vram_requirements self.is_downloaded = is_model_downloaded(self.name) + self.model_backend = model_backend def to_ui1(self) -> list: return [ @@ -245,7 +247,7 @@ model_menu = { MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), - MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), + MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), ], 'adventurelist': [ MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"), @@ -369,25 +371,24 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), - MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), - MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), - MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), - MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), - MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), - MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), - MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), - MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), - MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ - MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API), - MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API), - MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API), - MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API), + MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"), + MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"), + MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"), + MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"), + MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"), MenuFolder("Return to Main Menu", "mainmenu"), ] } @@ -1670,6 +1671,7 @@ def load_model(model_backend, initial_load=False): model = model_backends[model_backend] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) + koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer @@ -6136,7 +6138,7 @@ def UI_2_select_model(data): #Get load methods if 'path' not in data or data['path'] == "": valid_loaders = {} - for model_backend in model_backends: + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) else: diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 5d8552fb..8e05fbbd 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -70,6 +70,7 @@ class model_backend(InferenceModel): "id": "model", "default": model_name, "check": {"value": "", 'check': "!="}, + 'multiple': True, "tooltip": "Which model to use when running OpenAI/GooseAI.", "menu_path": "", "refresh_model_inputs": False, @@ -102,7 +103,7 @@ class model_backend(InferenceModel): engines = req.json() try: - engines = [{"text": en["name"], "value": en["name"]} for en in engines] + engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines] except: logger.error(engines) raise diff --git a/static/koboldai.css b/static/koboldai.css index f3dde4b7..b70c6877 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -352,7 +352,7 @@ border-top-right-radius: var(--tabs_rounding); grid-template-areas: "label value" "item item" "minlabel maxlabel"; - grid-template-rows: 20px 23px 20px; + grid-template-rows: 20px auto 20px; grid-template-columns: auto 30px; row-gap: 0.2em; background-color: var(--setting_background); @@ -2124,6 +2124,13 @@ body { cursor: pointer; background-color: #688f1f; } + +.loadmodelsettings { + overflow-y: auto; + max-height: 50%; +} + + /*----------------------------- Model Load Popup ------------------------------------------*/ #specspan, .popup_list_area .model_item .model { @@ -3539,7 +3546,7 @@ h2 .material-icons-outlined { } -.horde_trigger[model_model="ReadOnly"], +.horde_trigger[model_model="Read Only"], .horde_trigger[model_model="CLUSTER"] { display: none; } diff --git a/static/koboldai.js b/static/koboldai.js index 905403c1..399e52cf 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1695,12 +1695,20 @@ function model_settings_checker() { for (const temp of this.check_data['sum']) { if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + if (this.check_data['check_message']) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } } } } else { this.closest(".setting_container_model").classList.add('input_error'); - this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + if (this.check_data['check_message']) { + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + this.closest(".setting_container_model").removeAttribute("tooltip"); + } } } } @@ -1841,6 +1849,10 @@ function selected_model_info(sent_data) { select_element.setAttribute("data_type", item['unit']); select_element.onchange = onchange_event; select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if (('multiple' in item) && (item['multiple'])) { + select_element.multiple = true; + select_element.size = 10; + } if ('check' in item) { select_element.check_data = item['check']; } else { diff --git a/templates/popups.html b/templates/popups.html index 59f07e70..9c6b4a9e 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -48,7 +48,7 @@
- + - + + + {% include 'popups.html' %} + + From a1036465af02cefda32af06d4d3a04b0161aa118 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 12:46:02 -0400 Subject: [PATCH 17/68] Add warning about command line changes and new modular backend --- data/one_time_messages.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/data/one_time_messages.json b/data/one_time_messages.json index 3062827b..7485fd15 100644 --- a/data/one_time_messages.json +++ b/data/one_time_messages.json @@ -8,5 +8,10 @@ "id": 2, "title": "Changes since last version", "message": "

New Features

\n

Phrase Biasing

\nThere is now a Phrase Biasing implementation under Settings -> Biasing. You can now encourage or discourage the AI to generate words or phrases (without needing to use a userscript)\n

Context viewer

\nThe Context Viewer allows you to see what is sent to the AI. Given that only so much text can be read by the AI at a time, it's useful to know exactly what it's looking at.\n

Story Commentary

\nStory Commentary allows custom characters to speak their mind on your story. This can be configured under Settings -> Story Commentary. Characters can be added as World Into entries with a type of \"Commentator\".\n

New Chat UI (Experimental)

\nThis new interface for Chat Mode is a more visually \"chat like\" in comparison to the old text-based mode. To activate it, ensure both Chat Mode (Home -> Game Mode) and Experimental UI (Interface -> Experimental UI) are enabled, then change the Chat Style (Interface -> Chat Style) to \"Messages\". Similarly, to the story commentators, characters can be defined in the World Info menu; if a character's name matches a chat character defined in the World Info menu, the image on the character entry will be used as an icon. \n

Tweaks

\nTweaks allow small UI changes to be mixed and matched to create a more personalized interface.\n

Attention Bias (Experimental)

\nAttention Bias hopes to cause some parts of the context to be internally weighed more than others in self attention. This is very experimental, and only works on OPT-based models for now.\n

Genre

\nThe genre menu (Author's Note -> Genre) prepends genre information to the context. You can either choose from preset genres or write your own. Works better on models trained with genre/tag information, including most new models in the model picker.\n

World Info generation

\nWorld Info entries can now have their text generated automatically from a title and type. Powered by whatever model you have active, so effectiveness will vary with model.\n

Drag and drop import

\nImportable files can now be dragged into the UI to load them.\n

NovelAI lorebook/card support

\nNovelAI lorebooks and cards can now be imported as World Info. If a card is uploaded, the PNG will be used as the World Info image.\n

Finder (Ctrl+K)

\nAllows jumping to various UI elements and performing actions quickly. Mode can be adjusted by pressing the clicking the mode icon or with hotkeys on an empty search box (Search: '#', World Info: '>', Inference Scratchpad: '!', Image Prompting: '?').\n

Club import wizard

\nPrompts imported from aetherroom.club with placeholders will now show a setup prompt where you can input the value of these placeholders.\n

Context menu

\nA context menu has been added and is available in several areas. Give it a try by right-clicking on the main text area.\n

Substitutions

\nSubstitutions allow phrases to be replaced if you or the AI input them into the story. The default Substitutions are disabled and can be enabled with the pencil icon to the right of the entry.\n

Inference scratchpad

\nThe Inference Scratchpad is a way of prompting the AI outside in isolation; the AI will not see anything in your story, and nothing the AI responds with will be added to the story. This can be useful in scenarios where you wish to use the AI in a more generic way. For example, you could prompt it with something like \"List of fantasy names:\" to receive such a list.\n

Error notifications

\nErrors are generally less opaque to the user. Client sided errors and many server errors will show a notification detailing the error.\n

Ctrl+Click to jump to World Info entry

\nHolding Ctrl while clicking on a mention of a World Info tag will bring you to the entry.\n

Model picker indicators

\nThe model picker now has indicators showing if a model is downloaded, may achieve poor quality, or may not load on your system.\n

More shortcuts

\nPress Ctrl+? to view them.\n

Image Generation

\nYou can now generate flavor images based on the game text at each action. In the settings menu in the home tab, you can click generate image to create an image based on the current text. It will appear below the image. Hovering on the image will give you the prompt used to generate the image. You can click on the text of previous actions to see the image associated with that action and can right click on the image and hit retry to generate a new image based on that action.\nSettings for how/where the image is generated are in the left flyout menu under interface, image\n

Text to speech (Experimental)

\nText to speech is now available. To enable it go to the settings menu, enable experimental ui, then enable generate audio. Audio will be generated for your actions. Play buttons will appear next to the submit button, and right clicking an action will give you a new speak option to start reading from that point.\n

UI Mode

\nIn response to feedback, we've added different UI mode levels from simple to power user. Advanced hides some of the less used options, while Power User shows everything. Simple is very much a work in progress, but it intends to simplify the majority of settings to 3 sliders. Feel free to play with it but don't expect good results yet.\n

Presets

\nPresets are now here. Community presets are pre-loaded in KoboldAI and can be selected from the settings tab in the settings menu, or from the home screen. In addition, you can save your own presets and share them with others (or send them to use for future inclusion). Presets are saved in the presets folder\n

Alt Text Gen

\nWith this setting on the system will insert world info text the sentence before the word that triggers it in the AI text. This should make the AI pay more attention to it and make it more likely to influence the output.\n

Alt Multi Gen

\nIf set multiple generations will be generated sequentially rather than at once. This reduces the amount of VRAM required and can let you generate multiple story options with more demanding models at the potential expense of speed\n

Beep on Complete

\nThere is now an option in the settings menu, interface tab called been on complete. If set the browser will beep when generation is complete. Useful for slow systems\n

Privacy Screen (Experimental)

\nBy hitting ctrl+L the screen will be blurred for all users until the password is entered and unlock is clicked (password is set in the settings menu, interface tab.\n

Change Game Text Size

\nGame text can be adjusted to any size\n

No double spaces

\nWhen set double spaces will be replaced by single spaces\n

Themes

\nWe now have a theming engine. Themes can come in 3 flavors. Palette themes use a more basic theming system entirely in the UI. Select the colors from the Palette section and things will change. Advanced themes can have various variables set manually (click the advanced theme button to see). These allow you to go a level deeper than the palette system. Finally, we have custom themes. These are custom CSS code that can do almost anything. All themes can be saved and shared. Saved themes are stored in the themes folder\n

Auto Memory (Experimental)

\nThe start of auto-memory is in place and we are looking for feedback. It currently generates the summary but does not put it in memory (though you can copy-paste it). To see it, turn on experimental ui, go to the story menu, memory tab and click generate under auto-memory. \n

General Notes

\nIf you want a place to write stuff down that saves with the story but doesn't affect it, that's what the notes tab is for. It is found under the story menu, Notes tab\n

W++ (or SBF)

\nIn world info entries you can turn on w++ mode. This will allow you to enter data in the W++ format without having to actually write it.\n

World Info Titles

\nWorld info entries now have titles on them to make it easier to find the one you want. Soon the world info entries will be collapsed to just the title to make navigation easier\n

Download/Upload world info folders

\nWorld info folders can now be downloaded and/or uploaded. This will let you share world info easier.\n

Game Text in AI Context

\nGame text that will be in the AI's context is now bold in the game screen. This will let you easily see where the AI will stop remembering your game (anything not bolded is \"forgotten\")\n

World info context

\nText that triggers a world info entry will now be italicized. Hovering over that text will cause a tooltip to show what the AI text is that will be added.\n

Updated help text

\nHelp text has been expanded throughout the UI.\n

Context Bar

at the bottom of the story menu is a bar that shows how much of the AI's context is in use, and by what. Different colors correspond to different data types (actions, memory, world info, etc)\n
\n
\n

Improvements

\n

Author's Note

\nThe author's note is now inserted between sentences a configurable distance from the end of the story. This should improve the coherence of generated text while keeping the author's note relevant." + }, + "3": { + "id": 3, + "title": "Changes since last version", + "message": "

New Features

\n

Modular Model Backends

Model loading is now accomplished via seperate model backend files. This will allow KoboldAI to more easily add in new model backends (examples, 4-bit, GGML, whatever developers want to add) without having to do significant code rework.

Rework of command line arguments

--breakmodel command line arguments have been deleted and if you use those you will have to pass through --model_backend and --model_parameters." } } \ No newline at end of file From 9df1f03b12ffa2513b15472a96338483178fe760 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 14:28:36 -0400 Subject: [PATCH 18/68] Fix for custom huggingface model menu entry --- aiserver.py | 36 ++++---- modeling/inference_models/hf.py | 154 ++++++++++++++++++-------------- static/application.js | 23 ++++- static/koboldai.js | 23 ++++- 4 files changed, 139 insertions(+), 97 deletions(-) diff --git a/aiserver.py b/aiserver.py index b4aad4e7..fe6d7606 100644 --- a/aiserver.py +++ b/aiserver.py @@ -233,7 +233,7 @@ model_menu = { "mainmenu": [ MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), - MenuFolder("Load custom model from Hugging Face", "customhuggingface"), + MenuModel("Load custom model from Hugging Face", "customhuggingface", ""), MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), @@ -6135,7 +6135,7 @@ def UI_2_select_model(data): valid_loaders = {} for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) + emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) @@ -6149,24 +6149,20 @@ def UI_2_select_model(data): output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return - - - #We've selected a menu - if data['model'] in model_menu: - sendModelSelection(menu=data['model']) - #We've selected a custom line - elif data['menu'] in ("NeoCustom", "GPT2Custom"): - get_model_info(data['menu'], directory=data['display_name']) - #We've selected a custom menu folder - elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data: - sendModelSelection(menu=data['model'], folder=data['path']) - #We've selected a custom menu - elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"): - sendModelSelection(menu=data['model'], folder="./models") - else: - #We now have some model we want to potentially load. - #First we need to send the client the model parameters (layers, etc) - get_model_info(data['model']) + + + + +#==================================================================# +# Event triggered when user changes a model parameter and it's set to resubmit +#==================================================================# +@socketio.on('resubmit_model_info') +@logger.catch +def UI_2_resubmit_model_info(data): + valid_loaders = {} + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data) + emit("selected_model_info", {"model_backends": valid_loaders}) #==================================================================# # Event triggered when user loads a model diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 6f848fa9..eff3d1ce 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -33,95 +33,111 @@ class HFInferenceModel(InferenceModel): except: return False - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = [] if not self.hf_torch: return [] - if model_path is not None and os.path.exists(model_path): - self.model_config = AutoConfig.from_pretrained(model_path) - elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") - else: - self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") - layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None - if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: - if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): - with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: - temp = json.load(f) - break_values = temp['layers'] if 'layers' in temp else [layer_count] - disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0 + if model_name == 'customhuggingface': + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "Huggingface Model Name", + "id": "custom_model_name", + "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Model name from https://huggingface.co/", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }) + + if model_name != 'customhuggingface' or "custom_model_name" in parameters: + model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name + if model_path is not None and os.path.exists(model_path): + self.model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - break_values = [layer_count] - disk_blocks = 0 - - break_values = [int(x) for x in break_values if x != '' and x is not None] - gpu_count = torch.cuda.device_count() - break_values += [0] * (gpu_count - len(break_values)) - if disk_blocks is not None: - break_values += [int(disk_blocks)] - for i in range(gpu_count): + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + break_values = temp['layers'] if 'layers' in temp else [layer_count] + disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0 + else: + break_values = [layer_count] + disk_blocks = 0 + + break_values = [int(x) for x in break_values if x != '' and x is not None] + gpu_count = torch.cuda.device_count() + break_values += [0] * (gpu_count - len(break_values)) + if disk_blocks is not None: + break_values += [int(disk_blocks)] + for i in range(gpu_count): + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "{} Layers".format(torch.cuda.get_device_name(i)), + "id": "{}_Layers".format(i), + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": break_values[i], + "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) requested_parameters.append({ "uitype": "slider", "unit": "int", - "label": "{} Layers".format(torch.cuda.get_device_name(i)), - "id": "{}_Layers".format(i), + "label": "CPU Layers", + "id": "CPU_Layers", "min": 0, "max": layer_count, "step": 1, "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": break_values[i], - "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "default": layer_count - sum(break_values), + "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", "menu_path": "Layers", "extra_classes": "", "refresh_model_inputs": False }) - requested_parameters.append({ - "uitype": "slider", - "unit": "int", - "label": "CPU Layers", - "id": "CPU_Layers", - "min": 0, - "max": layer_count, - "step": 1, - "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, - "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": layer_count - sum(break_values), - "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - if disk_blocks is not None: + if disk_blocks is not None: + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "Disk Layers", + "id": "Disk_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": disk_blocks, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + else: requested_parameters.append({ - "uitype": "slider", - "unit": "int", - "label": "Disk Layers", - "id": "Disk_Layers", - "min": 0, - "max": layer_count, - "step": 1, - "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, - "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": disk_blocks, - "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "uitype": "toggle", + "unit": "bool", + "label": "Use GPU", + "id": "use_gpu", + "default": False, + "tooltip": "Whether or not to use the GPU", "menu_path": "Layers", "extra_classes": "", "refresh_model_inputs": False }) - else: - requested_parameters.append({ - "uitype": "toggle", - "unit": "bool", - "label": "Use GPU", - "id": "use_gpu", - "default": False, - "tooltip": "Whether or not to use the GPU", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - + return requested_parameters @@ -153,7 +169,7 @@ class HFInferenceModel(InferenceModel): self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel - self.model_name = parameters['id'] + self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None def unload(self): diff --git a/static/application.js b/static/application.js index 99a65ed7..ca445c5f 100644 --- a/static/application.js +++ b/static/application.js @@ -4009,7 +4009,25 @@ function model_settings_checker() { if (valid || missing_element) { //if we are supposed to refresh when this value changes we'll resubmit if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { - console.log("resubmit"); + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); } if ('sum' in this.check_data) { for (const temp of this.check_data['sum']) { @@ -4099,9 +4117,6 @@ function selected_model_info(sent_data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); - if (loader == sent_data['preselected']) { - modelplugin.value = sent_data['preselected']; - } //create the user input for each requested input for (item of items) { diff --git a/static/koboldai.js b/static/koboldai.js index 99595879..dabbcda9 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1683,7 +1683,25 @@ function model_settings_checker() { if (valid || missing_element) { //if we are supposed to refresh when this value changes we'll resubmit if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { - console.log("resubmit"); + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); } if ('sum' in this.check_data) { for (const temp of this.check_data['sum']) { @@ -1773,9 +1791,6 @@ function selected_model_info(sent_data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); - if (loader == sent_data['preselected']) { - modelplugin.value = sent_data['preselected']; - } //create the user input for each requested input for (item of items) { From 756a33c63e323372716a1321e649f01873ecb533 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:28:39 -0400 Subject: [PATCH 19/68] Added try loop on model backend so it will continue with other models. --- aiserver.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index fe6d7606..02ea2229 100644 --- a/aiserver.py +++ b/aiserver.py @@ -627,8 +627,11 @@ model_backend_code = {} model_backends = {} for module in os.listdir("./modeling/inference_models"): if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__': - model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) - model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + try: + model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) + model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + except: + logger.error("Model Backend {} failed to load".format(module)) old_socketio_on = socketio.on @@ -1572,7 +1575,7 @@ def general_startup(override_args=None): elif parameter['id'] not in arg_parameters: arg_parameters[parameter] = parameter['default'] if not ok_to_load: - logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)") + logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) logger.error("Missing: {}".format(", ".join(mising_parameters))) exit() From db30402c3bd01432f8a8a8239faee5c8e55991aa Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:30:36 -0400 Subject: [PATCH 20/68] Move RWKV to use Huggingface model backend --- aiserver.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/aiserver.py b/aiserver.py index 02ea2229..a1d548e9 100644 --- a/aiserver.py +++ b/aiserver.py @@ -371,16 +371,16 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), - MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), - MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), - MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ From b21884fc31c556c81a89158123dfce18ba398640 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:34:15 -0400 Subject: [PATCH 21/68] Better error reporting --- aiserver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index a1d548e9..7e8c09c8 100644 --- a/aiserver.py +++ b/aiserver.py @@ -56,6 +56,7 @@ import html import argparse import sys import gc +import traceback import lupa @@ -630,8 +631,10 @@ for module in os.listdir("./modeling/inference_models"): try: model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() - except: + except Exception: logger.error("Model Backend {} failed to load".format(module)) + logger.error(traceback.format_exc()) + old_socketio_on = socketio.on From 309f1c432ae79acdbeb6b52a6f65ed963ef5d36d Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:43:13 -0400 Subject: [PATCH 22/68] Added the ability to disable model backends in the model backend code. --- aiserver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 7e8c09c8..40335a9f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -631,10 +631,14 @@ for module in os.listdir("./modeling/inference_models"): try: model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]): + if model_backends[model_backend_code[module].model_backend_name].disable: + del model_backends[model_backend_code[module].model_backend_name] except Exception: logger.error("Model Backend {} failed to load".format(module)) logger.error(traceback.format_exc()) - + +logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends]))) old_socketio_on = socketio.on From 6df5fe4ad07acb7b901b65ade005ec8af40126dc Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 18:24:06 -0400 Subject: [PATCH 23/68] partial load model from custom path in menu --- aiserver.py | 20 ++++++++++++++++---- modeling/inference_models/api/class.py | 1 + modeling/inference_models/basic_api/class.py | 1 + modeling/inference_models/gooseai/class.py | 1 + modeling/inference_models/horde/class.py | 1 + modeling/inference_models/openai/class.py | 1 + modeling/inference_models/openai_gooseai.py | 6 ++++++ 7 files changed, 27 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 40335a9f..14d268be 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6141,11 +6141,19 @@ def UI_2_select_model(data): emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: #Get load methods - if 'path' not in data or data['path'] == "": + if data['ismenu'] == 'false': valid_loaders = {} - for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): - valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - emit("selected_model_info", {"model_backends": valid_loaders}) + if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]: + #Here if we have a model id that's in our menu, we explicitly use that backend + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) + else: + #Here we have a model that's not in our menu structure (either a custom model or a custom path + #so we'll just go through all the possible loaders + for model_backend in model_backends: + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) @@ -6154,8 +6162,12 @@ def UI_2_select_model(data): valid=False for model_backend in model_backends: if model_backends[model_backend].is_valid(path[1], path[0], "Custom"): + logger.debug("{} says valid".format(model_backend)) valid=True break + else: + logger.debug("{} says invalid".format(model_backend)) + output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py index d9ec1147..3d54edd9 100644 --- a/modeling/inference_models/api/class.py +++ b/modeling/inference_models/api/class.py @@ -6,6 +6,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py index 6f045ef5..2094d34e 100644 --- a/modeling/inference_models/basic_api/class.py +++ b/modeling/inference_models/basic_api/class.py @@ -4,6 +4,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py index 8d58b4b5..1073f45f 100644 --- a/modeling/inference_models/gooseai/class.py +++ b/modeling/inference_models/gooseai/class.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 387c5833..2c4c4bf5 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -5,6 +5,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py index 84fe6df9..492a3fdb 100644 --- a/modeling/inference_models/openai/class.py +++ b/modeling/inference_models/openai/class.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py index 4d885074..e4a027db 100644 --- a/modeling/inference_models/openai_gooseai.py +++ b/modeling/inference_models/openai_gooseai.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger @@ -30,6 +31,11 @@ class model_backend(InferenceModel): return model_name == "OAI" or model_name == "GooseAI" def get_requested_parameters(self, model_name, model_path, menu_path): + try: + print(self.source) + except: + print(vars(self)) + raise if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): with open("settings/{}.model_backend.settings".format(self.source), "r") as f: self.key = json.load(f)['key'] From a1ee6849dc1d98c287561d5bdb6aff225c0322a5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 18:28:47 -0400 Subject: [PATCH 24/68] Custom Paths from Menu structure fixed --- aiserver.py | 3 ++- modeling/inference_models/gooseai/class.py | 2 +- modeling/inference_models/openai/class.py | 2 +- modeling/inference_models/openai_gooseai.py | 5 ----- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/aiserver.py b/aiserver.py index 14d268be..d4a127f0 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6152,7 +6152,8 @@ def UI_2_select_model(data): #Here we have a model that's not in our menu structure (either a custom model or a custom path #so we'll just go through all the possible loaders for model_backend in model_backends: - valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py index 1073f45f..934f15dd 100644 --- a/modeling/inference_models/gooseai/class.py +++ b/modeling/inference_models/gooseai/class.py @@ -19,7 +19,6 @@ model_backend_name = "GooseAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") - self.source = "GooseAI" class model_backend(openai_gooseai_model_backend): @@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend): def __init__(self): super().__init__() self.url = "https://api.goose.ai/v1/engines" + self.source = "GooseAI" def is_valid(self, model_name, model_path, menu_path): return model_name == "GooseAI" \ No newline at end of file diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py index 492a3fdb..cea644ea 100644 --- a/modeling/inference_models/openai/class.py +++ b/modeling/inference_models/openai/class.py @@ -19,7 +19,6 @@ model_backend_name = "OpenAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") - self.source = "OpenAI" class model_backend(openai_gooseai_model_backend): @@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend): def __init__(self): super().__init__() self.url = "https://api.openai.com/v1/engines" + self.source = "OpenAI" def is_valid(self, model_name, model_path, menu_path): return model_name == "OAI" \ No newline at end of file diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py index e4a027db..e4b9dfb8 100644 --- a/modeling/inference_models/openai_gooseai.py +++ b/modeling/inference_models/openai_gooseai.py @@ -31,11 +31,6 @@ class model_backend(InferenceModel): return model_name == "OAI" or model_name == "GooseAI" def get_requested_parameters(self, model_name, model_path, menu_path): - try: - print(self.source) - except: - print(vars(self)) - raise if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): with open("settings/{}.model_backend.settings".format(self.source), "r") as f: self.key = json.load(f)['key'] From 128c77e0fde7deae7fa30e65cc4166eb46ba314d Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 19:01:11 -0400 Subject: [PATCH 25/68] Default model backend to huggingface if not present when loading a model through the command line --- aiserver.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aiserver.py b/aiserver.py index d4a127f0..a8591dc3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1365,7 +1365,7 @@ def general_startup(override_args=None): parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") - parser.add_argument("--model_backend", help="Specify the model backend you want to use") + parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use") parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") @@ -1558,10 +1558,6 @@ def general_startup(override_args=None): if args.model: # At this point we have to try to load the model through the selected backend - if not args.model_backend: - logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") - logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends]))) - exit() if args.model_backend not in model_backends: logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends]))) exit() @@ -1576,11 +1572,11 @@ def general_startup(override_args=None): arg_parameters['use_gpu'] = True for parameter in parameters: - if parameter['default'] == "" or parameter['id'] not in arg_parameters: + if parameter['default'] == "" and parameter['id'] not in arg_parameters: mising_parameters.append(parameter['id']) ok_to_load = False elif parameter['id'] not in arg_parameters: - arg_parameters[parameter] = parameter['default'] + arg_parameters[parameter['id']] = parameter['default'] if not ok_to_load: logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) From 19559d5eef5999c48503852d02d45c1c7fcce7ec Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 19:15:25 -0400 Subject: [PATCH 26/68] Fix for colors in the classic UI --- static/custom.css | 74 ++++++++++++++++++++++++++++++++++++++++ templates/templates.html | 1 - 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/static/custom.css b/static/custom.css index ffa6f44f..412c7f1b 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2330,4 +2330,78 @@ body.connected .popupfooter, .popupfooter.always-available { .popup .model_item .model_menu_selected { color: var(--popup_selected_color); background-color: var(--popup_selected_color_text); +} + +.settings_select { + color: var(--dropdown_text); + background: var(--dropdown_background); + margin-left: auto; + margin-right: 25px; +} + +.setting_value { + text-align: right; + grid-area: value; + font-size: calc(12px + var(--font_size_adjustment)); + padding: 2px; + padding-top: 0px; + background-color: inherit; + color: inherit; + border: none; + outline: none; +} + +.setting_value:focus { + color: var(--text_edit); +} + +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px auto 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + +.setting_minlabel { + padding-top: 6px; + grid-area: minlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: left; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.setting_maxlabel { + padding-top: 6px; + grid-area: maxlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: right; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.setting_label { + display: flex; + grid-area: label; + overflow: hidden; + padding: 5px; + padding-right: 0px; + padding-top: 0px; } \ No newline at end of file diff --git a/templates/templates.html b/templates/templates.html index 49fa99f6..926bf854 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -1,5 +1,4 @@ -
From 513b8575e71d164fc82747009f8fd3391f4ceb28 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sat, 20 May 2023 11:01:49 -0400 Subject: [PATCH 27/68] Fix for missing import Fix for model name being a path which caused save issues --- aiserver.py | 2 +- modeling/inference_models/hf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index a8591dc3..38ffc3f6 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6165,7 +6165,7 @@ def UI_2_select_model(data): else: logger.debug("{} says invalid".format(model_backend)) - output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) + output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index eff3d1ce..318423d5 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -8,6 +8,7 @@ import koboldai_settings from logger import logger from modeling.inference_model import InferenceModel import torch +import gc class HFInferenceModel(InferenceModel): From 925cad2e2fa6c65b8ea37680d19fa69023cce9f5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 14:50:13 -0400 Subject: [PATCH 28/68] Better compatibility with hf model backend --- modeling/inference_models/hf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 318423d5..b209d49f 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -1,4 +1,4 @@ -import os +import os, sys from typing import Optional from transformers import AutoConfig import warnings @@ -196,9 +196,10 @@ class HFInferenceModel(InferenceModel): except: pass if self.hf_torch: - breakmodel.breakmodel = True - breakmodel.gpu_blocks = [] - breakmodel.disk_blocks = 0 + if 'breakmodel' in sys.modules: + breakmodel.breakmodel = True + breakmodel.gpu_blocks = [] + breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults From dc20e6dde9152fd609ae06d362b05b9a0ac29bb5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 15:04:33 -0400 Subject: [PATCH 29/68] Fix for unloading models --- modeling/inference_models/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index b209d49f..53c802b1 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -197,6 +197,7 @@ class HFInferenceModel(InferenceModel): pass if self.hf_torch: if 'breakmodel' in sys.modules: + import breakmodel breakmodel.breakmodel = True breakmodel.gpu_blocks = [] breakmodel.disk_blocks = 0 From ca770844b0d6002f07d5b347190be0b25e6faf3d Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 15:07:59 -0400 Subject: [PATCH 30/68] Fix for breakmodel --- modeling/inference_models/hf_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 5dd53bf8..47c37436 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -820,7 +820,7 @@ class HFTorchInferenceModel(HFInferenceModel): breakmodel.gpu_blocks = [0] * n_layers return - elif breakmodel.gpu_blocks != []: + elif breakmodel.gpu_blocks == []: logger.info("Breakmodel not specified, assuming GPU 0") breakmodel.gpu_blocks = [n_layers] n_layers = 0 From f1a16f260f4f22384ae882042860228134bf6222 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 16:10:41 -0400 Subject: [PATCH 31/68] Potential breakmodel fix --- modeling/inference_models/hf_torch.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 47c37436..5595edc7 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -824,6 +824,20 @@ class HFTorchInferenceModel(HFInferenceModel): logger.info("Breakmodel not specified, assuming GPU 0") breakmodel.gpu_blocks = [n_layers] n_layers = 0 + + else: + s = n_layers + for i in range(len(breakmodel.gpu_blocks)): + if breakmodel.gpu_blocks[i] <= -1: + breakmodel.gpu_blocks[i] = s + break + else: + s -= breakmodel.gpu_blocks[i] + assert sum(breakmodel.gpu_blocks) <= n_layers + n_layers -= sum(breakmodel.gpu_blocks) + if breakmodel.disk_blocks is not None: + assert breakmodel.disk_blocks <= n_layers + n_layers -= breakmodel.disk_blocks logger.init_ok("Final device configuration:", status="Info") self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device) From 9e53bcf67684198bbbaeb3e67281c1641419f448 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:24:57 -0400 Subject: [PATCH 32/68] Fix for breakmodel loading to CPU when set to GPU --- modeling/inference_models/generic_hf_torch/class.py | 8 +++++--- modeling/inference_models/hf.py | 6 ++++-- modeling/inference_models/hf_torch.py | 3 +++ static/custom.css | 5 +++++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 4e2c8a5b..572337e2 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel): self.patch_embedding() + if utils.koboldai_vars.hascuda: - if utils.koboldai_vars.usegpu: + if self.usegpu: # Use just VRAM self.model = self.model.half().to(utils.koboldai_vars.gpu_device) - elif utils.koboldai_vars.breakmodel: + elif self.breakmodel: # Use both RAM and VRAM (breakmodel) if not self.lazy_load: self.breakmodel_device_config(self.model.config) @@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel): self._move_to_devices() else: self.model = self.model.to("cpu").float() - + + self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 53c802b1..e801eab2 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel): layers.append(None) else: layers.append(parameters["{}_Layers".format(i)]) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None if isinstance(self.cpu_layers, str): self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 self.layers = layers @@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel): self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers - self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0 self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel + else: + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 5595edc7..c5560360 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel): return "Unknown" def _post_load(m_self) -> None: + if not utils.koboldai_vars.model_type: utils.koboldai_vars.model_type = m_self.get_model_type() @@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel): ) ) # print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True) + #logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ") model_dict[key] = model_dict[key].materialize( f, map_location="cpu" ) @@ -847,6 +849,7 @@ class HFTorchInferenceModel(HFInferenceModel): # If all layers are on the same device, use the old GPU generation mode while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: breakmodel.gpu_blocks.pop() + self.breakmodel = True if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in ( -1, utils.num_layers(config), diff --git a/static/custom.css b/static/custom.css index 412c7f1b..968d73e4 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2404,4 +2404,9 @@ body.connected .popupfooter, .popupfooter.always-available { padding: 5px; padding-right: 0px; padding-top: 0px; +} + +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; } \ No newline at end of file From 4c25d6fbbbfad67176056a6f5af1826c2c2eb24c Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:34:01 -0400 Subject: [PATCH 33/68] Fix for loading model multiple times loosing the gpu/cpu splits --- modeling/inference_models/hf.py | 6 ------ modeling/inference_models/hf_torch.py | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e801eab2..b50ebf56 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass - if self.hf_torch: - if 'breakmodel' in sys.modules: - import breakmodel - breakmodel.breakmodel = True - breakmodel.gpu_blocks = [] - breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index c5560360..681d3ab1 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel): if device_count < 2: primary = None logger.debug("n_layers: {}".format(n_layers)) + logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel): n_layers = utils.num_layers(config) + logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks)) + if utils.args.cpu: breakmodel.gpu_blocks = [0] * n_layers return From 48226191922a48024a75a531668d3638b1f71155 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:47:14 -0400 Subject: [PATCH 34/68] Fix for model backends that have no inputs not being able to load in the UI --- static/koboldai.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/static/koboldai.js b/static/koboldai.js index dabbcda9..c4b2e160 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1933,6 +1933,8 @@ function selected_model_info(sent_data) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } + model_settings_checker() + } function update_gpu_layers() { From 5561cc1f220c0cf9d957bcbd3e535ad88502ab82 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 08:33:19 -0400 Subject: [PATCH 35/68] Fix for GPU generation --- modeling/inference_models/hf_torch.py | 13 ++++++++- static/application.js | 42 +++++++++++++++------------ static/koboldai.js | 40 +++++++++++++------------ 3 files changed, 58 insertions(+), 37 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 681d3ab1..2f575e73 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -125,6 +125,17 @@ class HFTorchInferenceModel(HFInferenceModel): else: return "Unknown" + def get_auxilary_device(self): + """Get device auxilary tensors like inputs should be stored on.""" + + # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU. + if utils.koboldai_vars.hascuda and self.usegpu: + return utils.koboldai_vars.gpu_device + elif utils.koboldai_vars.hascuda and self.breakmodel: + import breakmodel + return breakmodel.primary_device + return "cpu" + def _post_load(m_self) -> None: if not utils.koboldai_vars.model_type: @@ -226,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel): else: gen_in = prompt_tokens - device = utils.get_auxilary_device() + device = self.get_auxilary_device() gen_in = gen_in.to(device) additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] diff --git a/static/application.js b/static/application.js index ca445c5f..ca81f729 100644 --- a/static/application.js +++ b/static/application.js @@ -4012,16 +4012,18 @@ function model_settings_checker() { //get an object of all the input settings from the user data = {} settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; @@ -4259,6 +4261,8 @@ function selected_model_info(sent_data) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } + model_settings_checker(); + } function getModelParameterCount(modelName) { @@ -4371,16 +4375,18 @@ function load_model() { //get an object of all the input settings from the user data = {} - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; diff --git a/static/koboldai.js b/static/koboldai.js index c4b2e160..f0a1f6f8 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1686,16 +1686,18 @@ function model_settings_checker() { //get an object of all the input settings from the user data = {} settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; @@ -1965,16 +1967,18 @@ function load_model() { //get an object of all the input settings from the user data = {} - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; From 7a8e4c39da3c1d30ddf3489945799b2695d9be86 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 08:35:15 -0400 Subject: [PATCH 36/68] Fix for attention bias --- aiserver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 38ffc3f6..6276e514 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3693,7 +3693,8 @@ def calcsubmit(txt): bias += [1] * (i - top_index) bias[i] = b["multiplier"] - device = utils.get_auxilary_device() + + device = model.get_auxilary_device() attention_bias.attention_bias = torch.Tensor(bias).to(device) logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}") logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time)) From 839d56ebf2e7409705a109722bf55edd0fcee77c Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 19:25:01 -0400 Subject: [PATCH 37/68] Potential fix for gpt-neo and gpt-j --- modeling/inference_models/hf.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index b50ebf56..2417bffb 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -199,8 +199,9 @@ class HFInferenceModel(InferenceModel): pass def _post_load(self) -> None: + self.model_type = str(self.model_config.model_type) # These are model specific tokenizer overrides if a model has bad defaults - if utils.koboldai_vars.model_type == "llama": + if self.model_type == "llama": # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False @@ -284,23 +285,23 @@ class HFInferenceModel(InferenceModel): return result object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) - elif utils.koboldai_vars.model_type == "opt": + elif self.model_type == "opt": self.tokenizer._koboldai_header = self.tokenizer.encode("") self.tokenizer.add_bos_token = False self.tokenizer.add_prefix_space = False # Change newline behavior to match model quirks - if utils.koboldai_vars.model_type == "xglm": + if self.model_type == "xglm": # Default to newline mode if using XGLM utils.koboldai_vars.newlinemode = "s" - elif utils.koboldai_vars.model_type in ["opt", "bloom"]: + elif self.model_type in ["opt", "bloom"]: # Handle but don't convert newlines if using Fairseq models that have newlines trained in them utils.koboldai_vars.newlinemode = "ns" # Clean up tokens that cause issues if ( utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + and self.model_type not in ("gpt2", "gpt_neo", "gptj") ): utils.koboldai_vars.badwordsids = [ [v] @@ -357,15 +358,15 @@ class HFInferenceModel(InferenceModel): revision=utils.koboldai_vars.revision, cache_dir="cache", ) - utils.koboldai_vars.model_type = self.model_config.model_type + self.model_type = self.model_config.model_type except ValueError: - utils.koboldai_vars.model_type = { + self.model_type = { "NeoCustom": "gpt_neo", "GPT2Custom": "gpt2", - }.get(utils.koboldai_vars.model) + }.get(self.model) - if not utils.koboldai_vars.model_type: + if not self.model_type: logger.warning( "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" ) - utils.koboldai_vars.model_type = "gpt_neo" \ No newline at end of file + self.model_type = "gpt_neo" \ No newline at end of file From 9bd445c2a8d24a20b04aa905486c367455286ff9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 20:33:55 -0400 Subject: [PATCH 38/68] gpt2 fixed --- modeling/inference_models/generic_hf_torch/class.py | 2 +- modeling/inference_models/hf.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 572337e2..bbd42096 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -59,7 +59,7 @@ class model_backend(HFTorchInferenceModel): "low_cpu_mem_usage": True, } - if utils.koboldai_vars.model_type == "gpt2": + if self.model_type == "gpt2": # We must disable low_cpu_mem_usage and if using a GPT-2 model # because GPT-2 is not compatible with this feature yet. tf_kwargs.pop("low_cpu_mem_usage", None) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 2417bffb..7b8f356c 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -61,6 +61,7 @@ class HFInferenceModel(InferenceModel): else: self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: @@ -143,15 +144,13 @@ class HFInferenceModel(InferenceModel): return requested_parameters def set_input_parameters(self, parameters): - if self.hf_torch: + if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2": import breakmodel layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: gpu_count = torch.cuda.device_count() layers = [] - logger.info(parameters) for i in range(gpu_count): - logger.info(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric(): layers.append(int(parameters["{}_Layers".format(i)])) elif isinstance(parameters["{}_Layers".format(i)], str): @@ -170,8 +169,13 @@ class HFInferenceModel(InferenceModel): self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0 self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel + self.lazy_load = True + logger.debug("Model type: {}".format(self.model_type)) else: + logger.debug("Disabling breakmodel and lazyload") self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.breakmodel = False + self.lazy_load = False self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None @@ -199,6 +203,7 @@ class HFInferenceModel(InferenceModel): pass def _post_load(self) -> None: + utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default self.model_type = str(self.model_config.model_type) # These are model specific tokenizer overrides if a model has bad defaults if self.model_type == "llama": From 935480a701c8cb1f672db15143af0cf6f6d006e9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 08:45:22 -0400 Subject: [PATCH 39/68] Added bad words to the transmit list for easier debugging --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index 5467fe29..29a82406 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -647,7 +647,7 @@ class settings(object): raise class model_settings(settings): - local_only_variables = ['badwordsids', 'apikey', 'default_preset'] + local_only_variables = ['apikey', 'default_preset'] no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns', 'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition', From 9d708bc4246e77230eeaee43d75dff5c1d4f294b Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 08:56:52 -0400 Subject: [PATCH 40/68] Logging of environmental variables over-riding command line arguments --- aiserver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiserver.py b/aiserver.py index 6276e514..97472f81 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1405,6 +1405,7 @@ def general_startup(override_args=None): args = parser.parse_args(shlex.split(override_args)) elif(os.environ.get("KOBOLDAI_ARGS") is not None): import shlex + logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"])) args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"])) else: args = parser.parse_args() From c61e2b676a5917072d665812849e4407632c1724 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 09:05:21 -0400 Subject: [PATCH 41/68] More environmental variable feedback --- aiserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiserver.py b/aiserver.py index 97472f81..777b36d3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1428,9 +1428,11 @@ def general_startup(override_args=None): for arg in temp: if arg == "path": if "model_path" in os.environ: + logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"])) setattr(args, arg, os.environ["model_path"]) else: if arg in os.environ: + logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg])) if isinstance(getattr(args, arg), bool): if os.environ[arg].lower() == "true": setattr(args, arg, True) From 068173b24a3f0da9df8144db7a12052d814874cb Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 09:08:34 -0400 Subject: [PATCH 42/68] Potential BadWords fix --- modeling/inference_models/hf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 7b8f356c..032b8ec3 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -203,7 +203,7 @@ class HFInferenceModel(InferenceModel): pass def _post_load(self) -> None: - utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default + self.badwordsids = koboldai_settings.badwordsids_default self.model_type = str(self.model_config.model_type) # These are model specific tokenizer overrides if a model has bad defaults if self.model_type == "llama": @@ -305,17 +305,17 @@ class HFInferenceModel(InferenceModel): # Clean up tokens that cause issues if ( - utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default + self.badwordsids == koboldai_settings.badwordsids_default and self.model_type not in ("gpt2", "gpt_neo", "gptj") ): - utils.koboldai_vars.badwordsids = [ + self.badwordsids = [ [v] for k, v in self.tokenizer.get_vocab().items() if any(c in str(k) for c in "[]") ] if utils.koboldai_vars.newlinemode == "n": - utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + self.badwordsids.append([self.tokenizer.eos_token_id]) return super()._post_load() From 92f592ea203a8f758a73d982aa5fb96ee3670eed Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 11:48:25 -0400 Subject: [PATCH 43/68] Fix for model name not showing correctly on load in UI1 --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 777b36d3..42715de6 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1643,7 +1643,7 @@ def load_model(model_backend, initial_load=False): koboldai_vars.noai = False set_aibusy(True) if koboldai_vars.model != 'ReadOnly': - emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) + emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True) #Have to add a sleep so the server will send the emit for some reason time.sleep(0.1) From 1a1b79a16d2e5d7cc7e8865350de0e8afde24357 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 12:44:26 -0400 Subject: [PATCH 44/68] Change default for HF on non-breakmodel models to use GPU instead of CPU --- modeling/inference_models/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 032b8ec3..ee585321 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -133,7 +133,7 @@ class HFInferenceModel(InferenceModel): "unit": "bool", "label": "Use GPU", "id": "use_gpu", - "default": False, + "default": True, "tooltip": "Whether or not to use the GPU", "menu_path": "Layers", "extra_classes": "", From b116e22bca85f059976711063850bdbfc5430522 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 16:47:19 -0400 Subject: [PATCH 45/68] Fix for colab --- aiserver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aiserver.py b/aiserver.py index 42715de6..b06aaa83 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1559,6 +1559,9 @@ def general_startup(override_args=None): socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) + if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface": + args.model_backend = "Huggingface MTJ" + if args.model: # At this point we have to try to load the model through the selected backend if args.model_backend not in model_backends: @@ -1593,6 +1596,7 @@ def general_startup(override_args=None): return args.model_backend else: return "Read Only" + From 5fe8c71b2ed9132ca591d3797d1deca6f8e8762e Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 18:55:31 -0400 Subject: [PATCH 46/68] TPU Fixes --- modeling/inference_models/hf_mtj/class.py | 3 ++- tpu_mtj_backend.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 4de3a1b2..876e950e 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -186,6 +186,7 @@ class model_backend(HFInferenceModel): tpu_mtj_backend.load_model( utils.koboldai_vars.model, + self.model_type, hf_checkpoint=utils.koboldai_vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and utils.koboldai_vars.use_colab_tpu, @@ -202,7 +203,7 @@ class model_backend(HFInferenceModel): if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + and self.model_type not in ("gpt2", "gpt_neo", "gptj") ): utils.koboldai_vars.badwordsids = [ [v] diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 07261636..d5a4d1db 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -941,7 +941,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): koboldai_vars.status_message = "" -def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if kwargs.get("pad_token_id"): @@ -989,9 +989,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa # Try to convert HF config.json to MTJ config if hf_checkpoint: - spec_path = os.path.join("maps", koboldai_vars.model_type + ".json") + spec_path = os.path.join("maps", model_type + ".json") if not os.path.isfile(spec_path): - raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}") + raise NotImplementedError(f"Unsupported model type {repr(model_type)}") with open(spec_path) as f: lazy_load_spec = json.load(f) From 6620df535035f8717f402e06381de062cd81918f Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:03:49 -0400 Subject: [PATCH 47/68] debug info --- modeling/inference_models/hf_mtj/class.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 876e950e..91ddf03d 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -184,6 +184,7 @@ class model_backend(HFInferenceModel): self.init_model_config() utils.koboldai_vars.allowsp = True + logger.info(self.model_type) tpu_mtj_backend.load_model( utils.koboldai_vars.model, self.model_type, From 703da112ee8b6b14ead182a157b9cc82e6493707 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:17:51 -0400 Subject: [PATCH 48/68] TPU Fix --- modeling/inference_models/hf_mtj/class.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 91ddf03d..00fbfec3 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -186,9 +186,9 @@ class model_backend(HFInferenceModel): logger.info(self.model_type) tpu_mtj_backend.load_model( - utils.koboldai_vars.model, + self.model, self.model_type, - hf_checkpoint=utils.koboldai_vars.model + hf_checkpoint=self.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and utils.koboldai_vars.use_colab_tpu, socketio_queue=koboldai_settings.queue, From 6a627265754ef9b2cb2cfb20a476a8af1d383398 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:30:23 -0400 Subject: [PATCH 49/68] TPU Fix? --- aiserver.py | 2 +- modeling/inference_models/hf_mtj/class.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index b06aaa83..998441c8 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1548,7 +1548,7 @@ def general_startup(override_args=None): koboldai_vars.custmodpth = modpath elif args.model: logger.message(f"Welcome to KoboldAI!") - logger.message(f"You have selected the following Model: {koboldai_vars.model}") + logger.message(f"You have selected the following Model: {args.model}") if args.path: logger.message(f"You have selected the following path for your Model: {args.path}") koboldai_vars.custmodpth = args.path; diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 00fbfec3..bc31b3fa 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -187,7 +187,6 @@ class model_backend(HFInferenceModel): logger.info(self.model_type) tpu_mtj_backend.load_model( self.model, - self.model_type, hf_checkpoint=self.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and utils.koboldai_vars.use_colab_tpu, From 54221942ef74c20ac209c1ce52576cc65bf961ae Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:43:32 -0400 Subject: [PATCH 50/68] TPU Fix --- aiserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aiserver.py b/aiserver.py index 998441c8..ae8fecb3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1561,6 +1561,7 @@ def general_startup(override_args=None): if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface": args.model_backend = "Huggingface MTJ" + if args.model: # At this point we have to try to load the model through the selected backend @@ -1589,6 +1590,7 @@ def general_startup(override_args=None): logger.error("Missing: {}".format(", ".join(mising_parameters))) exit() arg_parameters['id'] = args.model + arg_parameters['model'] = args.model arg_parameters['model_path'] = args.path arg_parameters['menu_path'] = "" model_backends[args.model_backend].set_input_parameters(arg_parameters) From ea4e3c477c82cc2239ec1da8bac5e4de4410e91f Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:45:21 -0400 Subject: [PATCH 51/68] More debuging --- modeling/inference_models/hf_mtj/class.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index bc31b3fa..13591425 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -184,7 +184,8 @@ class model_backend(HFInferenceModel): self.init_model_config() utils.koboldai_vars.allowsp = True - logger.info(self.model_type) + logger.info(self.model) + logger.info(self.id) tpu_mtj_backend.load_model( self.model, hf_checkpoint=self.model From b0ed7da9dde714943632de5fd917de557fdf30b6 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:47:45 -0400 Subject: [PATCH 52/68] more tpu debugging --- aiserver.py | 1 + modeling/inference_models/hf_mtj/class.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index ae8fecb3..ec8d05a7 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1594,6 +1594,7 @@ def general_startup(override_args=None): arg_parameters['model_path'] = args.path arg_parameters['menu_path'] = "" model_backends[args.model_backend].set_input_parameters(arg_parameters) + logger.info(vars(model_backends[args.model_backend])) koboldai_vars.model = args.model return args.model_backend else: diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 13591425..5f19897f 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -185,7 +185,6 @@ class model_backend(HFInferenceModel): utils.koboldai_vars.allowsp = True logger.info(self.model) - logger.info(self.id) tpu_mtj_backend.load_model( self.model, hf_checkpoint=self.model From c9523a340e526c76f669bb269f7ff53116bf25c7 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 19:50:08 -0400 Subject: [PATCH 53/68] TPU Fix --- aiserver.py | 1 - modeling/inference_models/hf_mtj/class.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/aiserver.py b/aiserver.py index ec8d05a7..ae8fecb3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1594,7 +1594,6 @@ def general_startup(override_args=None): arg_parameters['model_path'] = args.path arg_parameters['menu_path'] = "" model_backends[args.model_backend].set_input_parameters(arg_parameters) - logger.info(vars(model_backends[args.model_backend])) koboldai_vars.model = args.model return args.model_backend else: diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index 5f19897f..e029db9d 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -150,7 +150,7 @@ class model_backend(HFInferenceModel): tpu_mtj_backend.socketio = utils.socketio - if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX": + if self.model_name == "TPUMeshTransformerGPTNeoX": utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox print( @@ -158,7 +158,7 @@ class model_backend(HFInferenceModel): Colors.PURPLE, Colors.END ) ) - if utils.koboldai_vars.model in ( + if self.model_name in ( "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX", ) and ( @@ -168,7 +168,7 @@ class model_backend(HFInferenceModel): raise FileNotFoundError( f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder" ) - if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX": + if self.model_name == "TPUMeshTransformerGPTNeoX": tpu_mtj_backend.pad_token_id = 2 tpu_mtj_backend.koboldai_vars = utils.koboldai_vars @@ -184,10 +184,10 @@ class model_backend(HFInferenceModel): self.init_model_config() utils.koboldai_vars.allowsp = True - logger.info(self.model) + logger.info(self.model_name) tpu_mtj_backend.load_model( - self.model, - hf_checkpoint=self.model + self.model_name, + hf_checkpoint=self.model_name not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and utils.koboldai_vars.use_colab_tpu, socketio_queue=koboldai_settings.queue, From 1a7c2ddab0b582758456af292c439f177460df53 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 20:14:22 -0400 Subject: [PATCH 54/68] TPU Fix? --- tpu_mtj_backend.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index d5a4d1db..bf08f745 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_ return carry class PenalizingCausalTransformer(CausalTransformer): - def __init__(self, config, **kwargs): + def __init__(self, badwordsids, config, **kwargs): # Initialize super().__init__(config, **kwargs) def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None): compiling_callback() numseqs = numseqs_aux.shape[0] # These are the tokens that we don't want the AI to ever write - badwords = jnp.array(koboldai_vars.badwordsids).squeeze() + badwords = jnp.array(badwordsids).squeeze() @hk.transform def generate_sample(context, ctx_length): # Give the initial context to the transformer @@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): koboldai_vars.status_message = "" -def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +import koboldai_settings + +def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if kwargs.get("pad_token_id"): @@ -1119,12 +1121,12 @@ def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", global badwords # These are the tokens that we don't want the AI to ever write - badwords = jnp.array(koboldai_vars.badwordsids).squeeze() + badwords = jnp.array(badwordsids).squeeze() if not path.endswith("/"): path += "/" - network = PenalizingCausalTransformer(params, dematerialized=True) + network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True) if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX": network.state = read_ckpt_lowmem(network.state, path, devices.shape[1]) From b5272ea607ad38e162b4625893ee491900305342 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 24 May 2023 20:15:11 -0400 Subject: [PATCH 55/68] Whoops --- tpu_mtj_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index bf08f745..df37e0be 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -943,7 +943,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): import koboldai_settings -def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if kwargs.get("pad_token_id"): From adb77b86513f0037c2197c185af7a91553d36e04 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 25 May 2023 18:43:56 -0400 Subject: [PATCH 56/68] Fix for horde and multi-selected models --- aiserver.py | 1 + modeling/inference_models/horde/class.py | 2 +- static/application.js | 21 +++++++++++++++------ static/koboldai.js | 21 +++++++++++++++------ 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/aiserver.py b/aiserver.py index ae8fecb3..cfae94cd 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6199,6 +6199,7 @@ def UI_2_resubmit_model_info(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): + logger.debug("Loading model with user input of: {}".format(data)) model_backends[data['plugin']].set_input_parameters(data) load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 2c4c4bf5..38b1c5c6 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -1,6 +1,6 @@ from __future__ import annotations -import time +import time, json import torch import requests import numpy as np diff --git a/static/application.js b/static/application.js index ca81f729..8bc6c830 100644 --- a/static/application.js +++ b/static/application.js @@ -4378,12 +4378,21 @@ function load_model() { if (settings_area) { for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if ((element.tagName == "SELECT") && (element.multiple)) { + element_data = []; + for (var i=0, iLen=element.options.length; i Date: Thu, 25 May 2023 18:46:35 -0400 Subject: [PATCH 57/68] Added proper model name for horde --- modeling/inference_models/horde/class.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 38b1c5c6..3b102b46 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -29,6 +29,7 @@ class model_backend(InferenceModel): self.url = "https://horde.koboldai.net" self.key = "0000000000" self.models = self.get_cluster_models() + self.model_name = "Horde" # Do not allow API to be served over the API From d2c95bc60f6f9926699493b6a3144f427b62e5e9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 10:33:59 -0400 Subject: [PATCH 58/68] Fix for non-jailed menu path navigation --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index cfae94cd..e492cfcf 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6147,7 +6147,7 @@ def UI_2_select_model(data): emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: #Get load methods - if data['ismenu'] == 'false': + if 'ismenu' in data and data['ismenu'] == 'false': valid_loaders = {} if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]: #Here if we have a model id that's in our menu, we explicitly use that backend From 2c82e9c5e0fe0903f16291bcdb3816427a5af7f2 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 11:08:30 -0400 Subject: [PATCH 59/68] GooseAI Fixes --- modeling/inference_models/api/class.py | 2 +- modeling/inference_models/basic_api/class.py | 2 +- modeling/inference_models/horde/class.py | 10 +++++++--- modeling/inference_models/openai_gooseai.py | 18 ++++++++++++------ modeling/inference_models/readonly/class.py | 2 +- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py index 3d54edd9..b3129d5a 100644 --- a/modeling/inference_models/api/class.py +++ b/modeling/inference_models/api/class.py @@ -32,7 +32,7 @@ class model_backend(InferenceModel): def is_valid(self, model_name, model_path, menu_path): return model_name == "API" - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self): with open("settings/api.model_backend.settings", "r") as f: self.base_url = json.load(f)['base_url'] diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py index 2094d34e..b492c039 100644 --- a/modeling/inference_models/basic_api/class.py +++ b/modeling/inference_models/basic_api/class.py @@ -33,7 +33,7 @@ class model_backend(InferenceModel): def is_valid(self, model_name, model_path, menu_path): return model_name == "Colab" - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self): with open("settings/api.model_backend.settings", "r") as f: self.colaburl = json.load(f)['base_url'] diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 3b102b46..2cc01708 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -39,19 +39,23 @@ class model_backend(InferenceModel): logger.debug("Horde Models: {}".format(self.models)) return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models] - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self): with open("settings/horde.model_backend.settings", "r") as f: temp = json.load(f) self.base_url = temp['url'] self.key = temp['key'] + if 'key' in parameters: + self.key = parameters['key'] + if 'url' in parameters: + self.url = parameters['url'] requested_parameters = [] requested_parameters.extend([{ "uitype": "text", "unit": "text", "label": "URL", "id": "url", - "default": self.url, + "default": self.url if 'url' not in parameters else parameters['url'], "tooltip": "URL to the horde.", "menu_path": "", "check": {"value": "", 'check': "!="}, @@ -63,7 +67,7 @@ class model_backend(InferenceModel): "unit": "text", "label": "Key", "id": "key", - "default": self.key, + "default": self.key if 'key' not in parameters else parameters['key'], "check": {"value": "", 'check': "!="}, "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).", "menu_path": "", diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py index e4b9dfb8..0195f650 100644 --- a/modeling/inference_models/openai_gooseai.py +++ b/modeling/inference_models/openai_gooseai.py @@ -1,5 +1,5 @@ import torch -import requests +import requests,json import numpy as np from typing import List, Optional, Union import os @@ -30,10 +30,15 @@ class model_backend(InferenceModel): def is_valid(self, model_name, model_path, menu_path): return model_name == "OAI" or model_name == "GooseAI" - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): with open("settings/{}.model_backend.settings".format(self.source), "r") as f: - self.key = json.load(f)['key'] + try: + self.key = json.load(f)['key'] + except: + pass + if 'key' in parameters: + self.key = parameters['key'] self.source = model_name requested_parameters = [] requested_parameters.extend([{ @@ -66,7 +71,7 @@ class model_backend(InferenceModel): def set_input_parameters(self, parameters): self.key = parameters['key'].strip() - self.model = parameters['model'] + self.model_name = parameters['model'] def get_oai_models(self): if self.key == "": @@ -94,6 +99,7 @@ class model_backend(InferenceModel): logger.init_ok("OAI Engines", status="OK") + logger.debug("OAI Engines: {}".format(engines)) return engines else: # Something went wrong, print the message and quit since we can't initialize an engine @@ -134,7 +140,7 @@ class model_backend(InferenceModel): # Build request JSON data # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround # as the koboldai_vars.model will always be OAI - if "GooseAI" in utils.koboldai_vars.configname: + if self.source == "GooseAI": reqdata = { "prompt": decoded_prompt, "max_tokens": max_new, @@ -163,7 +169,7 @@ class model_backend(InferenceModel): } req = requests.post( - self.url, + "{}/{}/completions".format(self.url, self.model_name), json=reqdata, headers={ "Authorization": "Bearer " + self.key, diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py index 92531af4..98573990 100644 --- a/modeling/inference_models/readonly/class.py +++ b/modeling/inference_models/readonly/class.py @@ -33,7 +33,7 @@ class model_backend(InferenceModel): def is_valid(self, model_name, model_path, menu_path): return model_name == "ReadOnly" - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = [] return requested_parameters From 52f5d879061c7ce593fe05a417466d83425f0ad6 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 11:25:28 -0400 Subject: [PATCH 60/68] Fix horde tokenizer --- modeling/inference_models/horde/class.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 2cc01708..f7da6604 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -30,6 +30,7 @@ class model_backend(InferenceModel): self.key = "0000000000" self.models = self.get_cluster_models() self.model_name = "Horde" + self.model = [] # Do not allow API to be served over the API @@ -114,7 +115,7 @@ class model_backend(InferenceModel): engines = req.json() try: - engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines] + engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines] except: logger.error(engines) raise @@ -127,10 +128,14 @@ class model_backend(InferenceModel): return engines def _load(self, save_model: bool, initial_load: bool) -> None: + tokenizer_name = "gpt2" + if len(self.model) > 0: + if self.model[0] == "all" and len(self.model) > 1: + tokenizer_name = self.model[1] + else: + tokenizer_name = self.model[0] self.tokenizer = self._get_tokenizer( - self.model - #if len(self.model) > 0 - #else "gpt2", + tokenizer_name ) def _save_settings(self): From 0376ab5715a8283f05db91b4eede862bf84f216a Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 11:42:29 -0400 Subject: [PATCH 61/68] KoboldAI API model name fix --- modeling/inference_models/api/class.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py index b3129d5a..64cfd2ab 100644 --- a/modeling/inference_models/api/class.py +++ b/modeling/inference_models/api/class.py @@ -28,6 +28,7 @@ class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() self.base_url = "" + self.model_name = "KoboldAI API" def is_valid(self, model_name, model_path, menu_path): return model_name == "API" From 51cea7eb9dfafaf45a3b58b56dd5df45d21dca99 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 12:43:17 -0400 Subject: [PATCH 62/68] Added ability to add labels that are based on validation data in model loading settings --- modeling/inference_models/hf.py | 12 ++++++++++++ static/application.js | 31 +++++++++++++++++++++++++++++-- static/custom.css | 1 + static/koboldai.css | 1 + static/koboldai.js | 29 ++++++++++++++++++++++++++++- 5 files changed, 71 insertions(+), 3 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index ee585321..7a21bca6 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -77,6 +77,18 @@ class HFInferenceModel(InferenceModel): break_values += [0] * (gpu_count - len(break_values)) if disk_blocks is not None: break_values += [int(disk_blocks)] + requested_parameters.append({ + "uitype": "Valid Display", + "unit": "text", + "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value + "id": "valid_layers", + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) for i in range(gpu_count): requested_parameters.append({ "uitype": "slider", diff --git a/static/application.js b/static/application.js index 8bc6c830..11fba578 100644 --- a/static/application.js +++ b/static/application.js @@ -4080,6 +4080,25 @@ function model_settings_checker() { accept.classList.add("disabled"); accept.disabled = true; } + + + //We now have valid display boxes potentially. We'll go through them and update the display + for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) { + check_value = 0 + missing_element = false; + for (const temp of item.check_data['sum']) { + if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + if (!missing_element) { + item.innerText = item.original_text.replace("%1", check_value); + } + + + } } function selected_model_info(sent_data) { @@ -4250,18 +4269,26 @@ function selected_model_info(sent_data) { new_setting.querySelector('#blank_model_settings_text').remove(); } + if (item['uitype'] == "Valid Display") { + new_setting = document.createElement("DIV"); + new_setting.classList.add("model_settings_valid_display"); + new_setting.id = loader + "|" + item['id'] + "_value"; + new_setting.innerText = item['label']; + new_setting.check_data = item['check']; + new_setting.original_text = item['label']; + } + model_area.append(new_setting); loadmodelsettings.append(model_area); } } //unhide the first plugin settings - console.log(document.getElementById("modelplugin").value + "_settings_area"); if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } - model_settings_checker(); + model_settings_checker() } diff --git a/static/custom.css b/static/custom.css index 968d73e4..b8e3f455 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2395,6 +2395,7 @@ body.connected .popupfooter, .popupfooter.always-available { padding-top: 0px; text-align: right; font-size: calc(0.8em + var(--font_size_adjustment)); + text-align: left; } .setting_label { diff --git a/static/koboldai.css b/static/koboldai.css index 85aea08a..3252c21a 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -386,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding); padding-top: 0px; text-align: right; font-size: calc(0.8em + var(--font_size_adjustment)); + text-align: left; } .setting_label { diff --git a/static/koboldai.js b/static/koboldai.js index fc33a020..99383728 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1754,6 +1754,25 @@ function model_settings_checker() { accept.classList.add("disabled"); accept.disabled = true; } + + + //We now have valid display boxes potentially. We'll go through them and update the display + for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) { + check_value = 0 + missing_element = false; + for (const temp of item.check_data['sum']) { + if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + if (!missing_element) { + item.innerText = item.original_text.replace("%1", check_value); + } + + + } } function selected_model_info(sent_data) { @@ -1924,13 +1943,21 @@ function selected_model_info(sent_data) { new_setting.querySelector('#blank_model_settings_text').remove(); } + if (item['uitype'] == "Valid Display") { + new_setting = document.createElement("DIV"); + new_setting.classList.add("model_settings_valid_display"); + new_setting.id = loader + "|" + item['id'] + "_value"; + new_setting.innerText = item['label']; + new_setting.check_data = item['check']; + new_setting.original_text = item['label']; + } + model_area.append(new_setting); loadmodelsettings.append(model_area); } } //unhide the first plugin settings - console.log(document.getElementById("modelplugin").value + "_settings_area"); if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } From 64ef8ca7c29a7eedc19f194a2c3a3e6506c80a8c Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 14:04:26 -0400 Subject: [PATCH 63/68] Fix for UI1 not highlighting selected model --- static/custom.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/static/custom.css b/static/custom.css index b8e3f455..25aa7818 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2410,4 +2410,9 @@ body.connected .popupfooter, .popupfooter.always-available { .input_error { border: 5px solid red !important; box-sizing: border-box !important; +} + +.popup .model_item.model_menu_selected { + color: var(--popup_selected_color); + background-color: var(--popup_selected_color_text); } \ No newline at end of file From acf5b40cd8907996a5365e2353dd3ca8c09ff134 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 19:38:37 -0400 Subject: [PATCH 64/68] Bug fix --- aiserver.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index e492cfcf..9653fb25 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1579,11 +1579,12 @@ def general_startup(override_args=None): arg_parameters['use_gpu'] = True for parameter in parameters: - if parameter['default'] == "" and parameter['id'] not in arg_parameters: - mising_parameters.append(parameter['id']) - ok_to_load = False - elif parameter['id'] not in arg_parameters: - arg_parameters[parameter['id']] = parameter['default'] + if parameter['uitype'] != "Valid Display": + if parameter['default'] == "" and parameter['id'] not in arg_parameters: + mising_parameters.append(parameter['id']) + ok_to_load = False + elif parameter['id'] not in arg_parameters: + arg_parameters[parameter['id']] = parameter['default'] if not ok_to_load: logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) From 9723154bed0c442a7d0140c077f5c5edc7e2f73e Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 20:10:11 -0400 Subject: [PATCH 65/68] Fix for --path --- aiserver.py | 7 +++---- modeling/inference_models/hf.py | 4 +++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 9653fb25..4e02ef96 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1545,14 +1545,13 @@ def general_startup(override_args=None): if(modpath): # Save directory to koboldai_vars koboldai_vars.model = "NeoCustom" - koboldai_vars.custmodpth = modpath + args.path = modpath elif args.model: logger.message(f"Welcome to KoboldAI!") logger.message(f"You have selected the following Model: {args.model}") if args.path: logger.message(f"You have selected the following path for your Model: {args.path}") - koboldai_vars.custmodpth = args.path; - koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple + model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple #setup socketio relay queue koboldai_settings.queue = multiprocessing.Queue() @@ -1592,7 +1591,7 @@ def general_startup(override_args=None): exit() arg_parameters['id'] = args.model arg_parameters['model'] = args.model - arg_parameters['model_path'] = args.path + arg_parameters['path'] = args.path arg_parameters['menu_path'] = "" model_backends[args.model_backend].set_input_parameters(arg_parameters) koboldai_vars.model = args.model diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 7a21bca6..c7bfdee4 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -338,9 +338,11 @@ class HFInferenceModel(InferenceModel): Returns a string of the model's path locally, or None if it is not downloaded. If ignore_existance is true, it will always return a path. """ + if os.path.exists(self.path): + return self.path if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: - model_path = utils.koboldai_vars.custmodpth + model_path = self.path assert model_path # Path can be absolute or relative to models directory From 9bc9021843adf78f5b670a6974a4643f18efa099 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 21:16:54 -0400 Subject: [PATCH 66/68] Added better help message for model_parameters in command line arguments --- aiserver.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index 4e02ef96..406eb01d 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1366,7 +1366,7 @@ def general_startup(override_args=None): parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use") - parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)") + parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") @@ -1571,12 +1571,13 @@ def general_startup(override_args=None): parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "") ok_to_load = True mising_parameters = [] - arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {} + arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {} #If we're on colab we'll set everything to GPU0 if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab: arg_parameters['use_gpu'] = True + for parameter in parameters: if parameter['uitype'] != "Valid Display": if parameter['default'] == "" and parameter['id'] not in arg_parameters: @@ -1586,9 +1587,13 @@ def general_startup(override_args=None): arg_parameters[parameter['id']] = parameter['default'] if not ok_to_load: logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") - logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"]))) logger.error("Missing: {}".format(", ".join(mising_parameters))) exit() + if args.model_parameters.lower() == "help": + logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"]))) + exit() arg_parameters['id'] = args.model arg_parameters['model'] = args.model arg_parameters['path'] = args.path From cce5c1932cd94d3c710db62d1ced8feac2b5d774 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 26 May 2023 21:40:39 -0400 Subject: [PATCH 67/68] Fix for custom model names --- aiserver.py | 5 ++++- modeling/inference_models/generic_hf_torch/class.py | 2 +- modeling/inference_models/hf.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 406eb01d..d9ed0088 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1034,7 +1034,7 @@ def getmodelname(): if(koboldai_vars.online_model != ''): return(f"{koboldai_vars.model}/{koboldai_vars.online_model}") if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): - modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth)) + modelname = os.path.basename(os.path.normpath(model.path)) return modelname else: modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only" @@ -1687,6 +1687,9 @@ def load_model(model_backend, initial_load=False): model = model_backends[model_backend] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup + if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"): + koboldai_vars.model = os.path.basename(os.path.normpath(model.path)) + logger.info(koboldai_vars.model) logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index bbd42096..fd4c2a1a 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -41,7 +41,7 @@ class model_backend(HFTorchInferenceModel): if self.model_name == "NeoCustom": self.model_name = os.path.basename( - os.path.normpath(utils.koboldai_vars.custmodpth) + os.path.normpath(self.path) ) utils.koboldai_vars.model = self.model_name diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index c7bfdee4..5987a1ce 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -188,6 +188,7 @@ class HFInferenceModel(InferenceModel): self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.breakmodel = False self.lazy_load = False + logger.info(parameters) self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None From 47276c3424df73bd13fe7bcbe1c686b94319507c Mon Sep 17 00:00:00 2001 From: ebolam Date: Sat, 27 May 2023 08:49:21 -0400 Subject: [PATCH 68/68] Bug Fix --- modeling/inference_models/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 5987a1ce..4226d1b1 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -339,7 +339,8 @@ class HFInferenceModel(InferenceModel): Returns a string of the model's path locally, or None if it is not downloaded. If ignore_existance is true, it will always return a path. """ - if os.path.exists(self.path): + if self.path is not None: + if os.path.exists(self.path): return self.path if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: