diff --git a/aiserver.py b/aiserver.py
index e744d18e..e7227c81 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -168,6 +168,7 @@ class MenuFolder(MenuItem):
"size": "",
"isMenu": True,
"isDownloaded": False,
+ "isDirectory": False
}
class MenuModel(MenuItem):
@@ -200,8 +201,28 @@ class MenuModel(MenuItem):
"size": self.vram_requirements,
"isMenu": False,
"isDownloaded": self.is_downloaded,
+ "isDirectory": False,
}
+class MenuPath(MenuItem):
+ def to_ui1(self) -> list:
+ return [
+ self.label,
+ self.name,
+ "",
+ True,
+ ]
+
+ def to_json(self) -> dict:
+ return {
+ "label": self.label,
+ "name": self.name,
+ "size": "",
+ "isMenu": True,
+ "isDownloaded": False,
+ "isDirectory": True,
+ "path": "./models"
+ }
# AI models Menu
# This is a dict of lists where they key is the menu name, and the list is the menu items.
@@ -209,8 +230,8 @@ class MenuModel(MenuItem):
# 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
model_menu = {
"mainmenu": [
- MenuModel("Load a model from its directory", "NeoCustom"),
- MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
+ MenuPath("Load a model from its directory", "NeoCustom"),
+ MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
MenuFolder("Adventure Models", "adventurelist"),
MenuFolder("Novel Models", "novellist"),
@@ -600,6 +621,15 @@ utils.socketio = socketio
# Weird import position to steal koboldai_vars from utils
from modeling.patches import patch_transformers
+#Load all of the model importers
+import importlib
+model_loader_code = {}
+model_loaders = {}
+for module in os.listdir("./modeling/inference_models"):
+ if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
+ model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
+ model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader()
+
old_socketio_on = socketio.on
def new_socketio_on(*a, **k):
@@ -906,6 +936,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
)
def get_folder_path_info(base):
+ if base is None:
+ return [], []
if base == 'This PC':
breadcrumbs = [['This PC', 'This PC']]
paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
@@ -1932,25 +1964,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
koboldai_vars.breakmodel = False
if koboldai_vars.model == "Colab":
- from modeling.inference_models.basic_api import BasicAPIInferenceModel
- model = BasicAPIInferenceModel()
+ from modeling.inference_models.basic_api import model_loader
+ model = model_loader()
elif koboldai_vars.model == "API":
- from modeling.inference_models.api import APIInferenceModel
- model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", ""))
+ from modeling.inference_models.api import model_loader
+ model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
elif koboldai_vars.model == "CLUSTER":
- from modeling.inference_models.horde import HordeInferenceModel
- model = HordeInferenceModel()
+ from modeling.inference_models.horde import model_loader
+ model = model_loader()
elif koboldai_vars.model == "OAI":
- from modeling.inference_models.openai import OpenAIAPIInferenceModel
- model = OpenAIAPIInferenceModel()
+ from modeling.inference_models.openai import model_loader
+ model = model_loader()
model.load(initial_load=initial_load)
# TODO: This check sucks, make a model object or somethign
elif "rwkv" in koboldai_vars.model:
if koboldai_vars.use_colab_tpu:
raise RuntimeError("RWKV is not supported on the TPU.")
- from modeling.inference_models.rwkv import RWKVInferenceModel
- model = RWKVInferenceModel(koboldai_vars.model)
+ from modeling.inference_models.rwkv import model_loader
+ model = model_loader(koboldai_vars.model)
model.load()
elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
# HF Torch
@@ -1961,8 +1993,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
except:
pass
- from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
- model = GenericHFTorchInferenceModel(
+ from modeling.inference_models.generic_hf_torch import model_loader
+ model = model_loader(
koboldai_vars.model,
lazy_load=koboldai_vars.lazy_load,
low_mem=args.lowmem
@@ -1975,8 +2007,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
logger.info(f"Pipeline created: {koboldai_vars.model}")
else:
# TPU
- from modeling.inference_models.hf_mtj import HFMTJInferenceModel
- model = HFMTJInferenceModel(
+ from modeling.inference_models.hf_mtj import model_loader
+ model = model_loader(
koboldai_vars.model
)
model.load(
@@ -6430,7 +6462,9 @@ def UI_2_retry(data):
@socketio.on('load_model_button')
@logger.catch
def UI_2_load_model_button(data):
- sendModelSelection()
+ emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]})
+
+
#==================================================================#
# Event triggered when user clicks the a model
@@ -6438,6 +6472,38 @@ def UI_2_load_model_button(data):
@socketio.on('select_model')
@logger.catch
def UI_2_select_model(data):
+ logger.debug("Clicked on model entry: {}".format(data))
+ if data["name"] in model_menu and data['ismenu'] == "true":
+ emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
+ else:
+ #Get load methods
+ logger.debug("Asking for model info on potential model: {}".format(data))
+ valid = False
+ if 'path' not in data or data['path'] == "":
+ valid_loaders = {}
+ for model_loader in model_loaders:
+ logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"])))
+ if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+ valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+ valid = True
+ if valid:
+ logger.debug("Valid Loaders: {}".format(valid_loaders))
+ emit("selected_model_info", valid_loaders)
+ if not valid:
+ #Get directories
+ paths, breadcrumbs = get_folder_path_info(data['path'])
+ output = []
+ for path in paths:
+ valid=False
+ for model_loader in model_loaders:
+ if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"):
+ valid=True
+ break
+ output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+ emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
+
+ return
+
#We've selected a menu
if data['model'] in model_menu:
@@ -6462,26 +6528,9 @@ def UI_2_select_model(data):
@socketio.on('load_model')
@logger.catch
def UI_2_load_model(data):
- if not os.path.exists("settings/"):
- os.mkdir("settings")
- changed = True
- if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
- with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
- file_data = file.read().split('\n')[:2]
- if len(file_data) < 2:
- file_data.append("0")
- gpu_layers, disk_layers = file_data
- if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']:
- changed = False
- if changed:
- f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w")
- f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers']))
- f.close()
- koboldai_vars.colaburl = data['url'] + "/request"
- koboldai_vars.model = data['model']
- koboldai_vars.custmodpth = data['path']
- print("loading Model")
- load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
+ logger.info("loading Model")
+ logger.info(data)
+ #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
#==================================================================#
# Event triggered when load story is clicked
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index b253c5bf..27ad46db 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,15 @@ class InferenceModel:
]
self.tokenizer = None
self.capabilties = ModelCapabilities()
+
+ def is_valid(self, model_name, model_path, menu_path, vram):
+ return True
+
+ def requested_parameters(self, model_name, model_path, menu_path, vram):
+ return {}
+
+ def define_input_parameters(self):
+ return
def load(self, save_model: bool = False, initial_load: bool = False) -> None:
"""User-facing load function. Do not override this; try `_load()` instead."""
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index d25505b0..41088bc7 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -22,9 +22,31 @@ class APIException(Exception):
"""To be used for errors when using the Kobold API as an interface."""
-class APIInferenceModel(InferenceModel):
- def __init__(self, base_url: str) -> None:
+class model_loader(InferenceModel):
+ def __init__(self) -> None:
super().__init__()
+ #self.base_url = ""
+
+ def is_valid(self, model_name, model_path, menu_path):
+ return model_name == "API"
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ requested_parameters = []
+ requested_parameters.append({
+ "uitype": "text",
+ "unit": "text",
+ "label": "URL",
+ "id": "base_url",
+ "default": False,
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "The URL of the KoboldAI API to connect to.",
+ "menu_path": "",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+ return requested_parameters
+
+ def set_input_parameters(self, base_url=""):
self.base_url = base_url.rstrip("/")
def _load(self, save_model: bool, initial_load: bool) -> None:
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index c96eb42c..d7fc0863 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -19,12 +19,37 @@ class BasicAPIException(Exception):
"""To be used for errors when using the Basic API as an interface."""
-class BasicAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
def __init__(self) -> None:
super().__init__()
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
+
+ def is_valid(self, model_name, model_path, menu_path):
+ return model_name == "Colab"
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ requested_parameters = []
+ requested_parameters.append({
+ "uitype": "text",
+ "unit": "text",
+ "label": "URL",
+ "id": "colaburl",
+ "default": False,
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "The URL of the Colab KoboldAI API to connect to.",
+ "menu_path": "",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+ return requested_parameters
+
+ def set_input_parameters(self, colaburl=""):
+ self.colaburl = colaburl
+
+ def _initialize_model(self):
+ return
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
@@ -68,7 +93,7 @@ class BasicAPIInferenceModel(InferenceModel):
}
# Create request
- req = requests.post(utils.koboldai_vars.colaburl, json=reqdata)
+ req = requests.post(self.colaburl, json=reqdata)
if req.status_code != 200:
raise BasicAPIException(f"Bad status code {req.status_code}")
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index aa602b1a..366fbbb7 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -20,10 +20,14 @@ except ModuleNotFoundError as e:
if not utils.koboldai_vars.use_colab_tpu:
raise e
-from modeling.inference_models.hf_torch import HFTorchInferenceModel
+from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
-class GenericHFTorchInferenceModel(HFTorchInferenceModel):
+class model_loader(HFTorchInferenceModel):
+
+ def _initialize_model(self):
+ return
+
def _load(self, save_model: bool, initial_load: bool) -> None:
utils.koboldai_vars.allowsp = True
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
deleted file mode 100644
index cd609fed..00000000
--- a/modeling/inference_models/hf.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import os
-from typing import Optional
-from transformers import AutoConfig
-
-import utils
-import koboldai_settings
-from logger import logger
-from modeling.inference_model import InferenceModel
-
-
-class HFInferenceModel(InferenceModel):
- def __init__(self, model_name: str) -> None:
- super().__init__()
- self.model_config = None
- self.model_name = model_name
-
- self.model = None
- self.tokenizer = None
-
- def _post_load(self) -> None:
- # These are model specific tokenizer overrides if a model has bad defaults
- if utils.koboldai_vars.model_type == "llama":
- # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
- self.tokenizer.add_bos_token = False
-
- # HF transformers no longer supports decode_with_prefix_space
- # We work around this by wrapping decode, encode, and __call__
- # with versions that work around the 'prefix space' misfeature
- # of sentencepiece.
- vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
- has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
-
- # Wrap 'decode' with a method that always returns text starting with a space
- # when the head token starts with a space. This is what 'decode_with_prefix_space'
- # used to do, and we implement it using the same technique (building a cache of
- # tokens that should have a prefix space, and then prepending a space if the first
- # token is in this set.) We also work around a bizarre behavior in which decoding
- # a single token 13 behaves differently than decoding a squence containing only [13].
- original_decode = type(self.tokenizer.tokenizer).decode
- def decode_wrapper(self, token_ids, *args, **kwargs):
- first = None
- # Note, the code below that wraps single-value token_ids in a list
- # is to work around this wonky behavior:
- # >>> t.decode(13)
- # '<0x0A>'
- # >>> t.decode([13])
- # '\n'
- # Not doing this causes token streaming to receive <0x0A> characters
- # instead of newlines.
- if isinstance(token_ids, int):
- first = token_ids
- token_ids = [first]
- elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
- # Tensors don't support the Python standard of 'empty is False'
- # and the special case of dimension 0 tensors also needs to be
- # handled separately.
- if token_ids.dim() == 0:
- first = int(token_ids.item())
- token_ids = [first]
- elif len(token_ids) > 0:
- first = int(token_ids[0])
- elif token_ids:
- first = token_ids[0]
- result = original_decode(self, token_ids, *args, **kwargs)
- if first is not None and first in has_prefix_space:
- result = " " + result
- return result
- # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
- object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
-
- # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
- # The problem is that "Bob" at the start of text is encoded as if it is
- # " Bob". This creates a problem because it means you can't split text, encode
- # the pieces, concatenate the tokens, decode them, and get the original text back.
- # The workaround is to prepend a known token that (1) starts with a space; and
- # (2) is not the prefix of any other token. After searching through the vocab
- # " ," (space comma) is the only token containing only printable ascii characters
- # that fits this bill. By prepending ',' to the text, the original encode
- # method always returns [1919, ...], where the tail of the sequence is the
- # actual encoded result we want without the prefix space behavior.
- original_encode = type(self.tokenizer.tokenizer).encode
- def encode_wrapper(self, text, *args, **kwargs):
- if type(text) is str:
- text = ',' + text
- result = original_encode(self, text, *args, **kwargs)
- result = result[1:]
- else:
- result = original_encode(self, text, *args, **kwargs)
- return result
- object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
-
- # Since 'encode' is documented as being deprecated, also override __call__.
- # This doesn't appear to currently be used by KoboldAI, but doing so
- # in case someone uses it in the future.
- original_call = type(self.tokenizer.tokenizer).__call__
- def call_wrapper(self, text, *args, **kwargs):
- if type(text) is str:
- text = ',' + text
- result = original_call(self, text, *args, **kwargs)
- result = result[1:]
- else:
- result = original_call(self, text, *args, **kwargs)
- return result
- object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
-
- elif utils.koboldai_vars.model_type == "opt":
- self.tokenizer._koboldai_header = self.tokenizer.encode("")
- self.tokenizer.add_bos_token = False
- self.tokenizer.add_prefix_space = False
-
- # Change newline behavior to match model quirks
- if utils.koboldai_vars.model_type == "xglm":
- # Default to newline mode if using XGLM
- utils.koboldai_vars.newlinemode = "s"
- elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
- # Handle but don't convert newlines if using Fairseq models that have newlines trained in them
- utils.koboldai_vars.newlinemode = "ns"
-
- # Clean up tokens that cause issues
- if (
- utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
- and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
- ):
- utils.koboldai_vars.badwordsids = [
- [v]
- for k, v in self.tokenizer.get_vocab().items()
- if any(c in str(k) for c in "[]")
- ]
-
- if utils.koboldai_vars.newlinemode == "n":
- utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
-
- return super()._post_load()
-
- def get_local_model_path(
- self, legacy: bool = False, ignore_existance: bool = False
- ) -> Optional[str]:
- """
- Returns a string of the model's path locally, or None if it is not downloaded.
- If ignore_existance is true, it will always return a path.
- """
-
- if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
- model_path = utils.koboldai_vars.custmodpth
- assert model_path
-
- # Path can be absolute or relative to models directory
- if os.path.exists(model_path):
- return model_path
-
- model_path = os.path.join("models", model_path)
-
- try:
- assert os.path.exists(model_path)
- except AssertionError:
- logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
- raise
-
- return model_path
-
- basename = utils.koboldai_vars.model.replace("/", "_")
- if legacy:
- ret = basename
- else:
- ret = os.path.join("models", basename)
-
- if os.path.isdir(ret) or ignore_existance:
- return ret
- return None
-
- def init_model_config(self) -> None:
- # Get the model_type from the config or assume a model type if it isn't present
- try:
- self.model_config = AutoConfig.from_pretrained(
- self.get_local_model_path() or self.model_name,
- revision=utils.koboldai_vars.revision,
- cache_dir="cache",
- )
- utils.koboldai_vars.model_type = self.model_config.model_type
- except ValueError:
- utils.koboldai_vars.model_type = {
- "NeoCustom": "gpt_neo",
- "GPT2Custom": "gpt2",
- }.get(utils.koboldai_vars.model)
-
- if not utils.koboldai_vars.model_type:
- logger.warning(
- "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
- )
- utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 7661a67f..c99e9a05 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -16,19 +16,17 @@ from modeling.inference_model import (
GenerationSettings,
ModelCapabilities,
)
-from modeling.inference_models.hf import HFInferenceModel
-
-# This file shouldn't be imported unless using the TPU
-assert utils.koboldai_vars.use_colab_tpu
-import tpu_mtj_backend
+from modeling.inference_models.parents.hf import HFInferenceModel
-class HFMTJInferenceModel(HFInferenceModel):
+
+
+class model_loader(HFInferenceModel):
def __init__(
self,
- model_name: str,
+ #model_name: str,
) -> None:
- super().__init__(model_name)
+ super().__init__()
self.model_config = None
self.capabilties = ModelCapabilities(
@@ -38,8 +36,13 @@ class HFMTJInferenceModel(HFInferenceModel):
post_token_probs=False,
uses_tpu=True,
)
+
+ def is_valid(self, model_name, model_path, menu_path):
+ # This file shouldn't be imported unless using the TPU
+ return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path)
def setup_mtj(self) -> None:
+ import tpu_mtj_backend
def mtj_warper_callback(scores) -> "np.array":
scores_shape = scores.shape
scores_list = scores.tolist()
@@ -175,6 +178,7 @@ class HFMTJInferenceModel(HFInferenceModel):
tpu_mtj_backend.settings_callback = mtj_settings_callback
def _load(self, save_model: bool, initial_load: bool) -> None:
+ import tpu_mtj_backend
self.setup_mtj()
self.init_model_config()
utils.koboldai_vars.allowsp = True
@@ -207,6 +211,7 @@ class HFMTJInferenceModel(HFInferenceModel):
]
def get_soft_tokens(self) -> np.array:
+ import tpu_mtj_backend
soft_tokens = None
if utils.koboldai_vars.sp is None:
@@ -258,6 +263,7 @@ class HFMTJInferenceModel(HFInferenceModel):
seed: Optional[int] = None,
**kwargs,
) -> GenerationResult:
+ import tpu_mtj_backend
warpers.update_settings()
soft_tokens = self.get_soft_tokens()
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index c6294374..56e88205 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -21,13 +21,99 @@ class HordeException(Exception):
"""To be used for errors on server side of the Horde."""
-class HordeInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
def __init__(self) -> None:
super().__init__()
+ self.url = "https://horde.koboldai.net"
+ self.key = "0000000000"
+ self.models = self.get_cluster_models()
+
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
+ def is_valid(self, model_name, model_path, menu_path):
+ logger.debug("Horde Models: {}".format(self.models))
+ return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ requested_parameters = []
+ requested_parameters.extend([{
+ "uitype": "text",
+ "unit": "text",
+ "label": "URL",
+ "id": "url",
+ "default": self.url,
+ "tooltip": "URL to the horde.",
+ "menu_path": "",
+ "check": {"value": "", 'check': "!="},
+ "refresh_model_inputs": True,
+ "extra_classes": ""
+ },
+ {
+ "uitype": "text",
+ "unit": "text",
+ "label": "Key",
+ "id": "key",
+ "default": self.key,
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
+ "menu_path": "",
+ "refresh_model_inputs": True,
+ "extra_classes": ""
+ },
+ {
+ "uitype": "dropdown",
+ "unit": "text",
+ "label": "Model",
+ "id": "model",
+ "default": "",
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "Which model to use when running OpenAI/GooseAI.",
+ "menu_path": "",
+ "refresh_model_inputs": False,
+ "extra_classes": "",
+ 'children': self.models,
+
+ }])
+ return requested_parameters
+
+ def set_input_parameters(self, url="", key="", model=""):
+ self.key = key.strip()
+ self.model = model
+ self.url = url
+
+ def get_cluster_models(self):
+ # Get list of models from public cluster
+ logger.info("Retrieving engine list...")
+ try:
+ req = requests.get(f"{self.url}/api/v2/status/models?type=text")
+ except:
+ logger.init_err("KAI Horde Models", status="Failed")
+ logger.error("Provided KoboldAI Horde URL unreachable")
+ emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
+ return
+ if not req.ok:
+ # Something went wrong, print the message and quit since we can't initialize an engine
+ logger.init_err("KAI Horde Models", status="Failed")
+ logger.error(req.json())
+ emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
+ return
+
+ engines = req.json()
+ try:
+ engines = [{"text": en["name"], "value": en["name"]} for en in engines]
+ except:
+ logger.error(engines)
+ raise
+ logger.debug(engines)
+
+ online_model = ""
+
+ logger.init_ok("KAI Horde Models", status="OK")
+
+ return engines
+
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer(
utils.koboldai_vars.cluster_requested_models[0]
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index 1441ae2f..01c0c037 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -12,13 +12,96 @@ from modeling.inference_model import (
)
+
class OpenAIAPIError(Exception):
def __init__(self, error_type: str, error_message) -> None:
super().__init__(f"{error_type}: {error_message}")
-class OpenAIAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
"""InferenceModel for interfacing with OpenAI's generation API."""
+
+ def __init__(self):
+ super().__init__()
+ self.key = ""
+
+ def is_valid(self, model_name, model_path, menu_path):
+ return model_name == "OAI" or model_name == "GooseAI"
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ self.source = model_name
+ requested_parameters = []
+ requested_parameters.extend([{
+ "uitype": "text",
+ "unit": "text",
+ "label": "Key",
+ "id": "key",
+ "default": "",
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+ "menu_path": "",
+ "refresh_model_inputs": True,
+ "extra_classes": ""
+ },
+ {
+ "uitype": "dropdown",
+ "unit": "text",
+ "label": "Model",
+ "id": "model",
+ "default": "",
+ "check": {"value": "", 'check': "!="},
+ "tooltip": "Which model to use when running OpenAI/GooseAI.",
+ "menu_path": "",
+ "refresh_model_inputs": False,
+ "extra_classes": "",
+ 'children': self.get_oai_models(),
+
+ }])
+ return requested_parameters
+
+ def set_input_parameters(self, key="", model=""):
+ self.key = key.strip()
+ self.model = model
+
+ def get_oai_models(self):
+ if self.key == "":
+ return []
+ if self.source == 'OAI':
+ url = "https://api.openai.com/v1/engines"
+ elif self.source == 'GooseAI':
+ url = "https://api.goose.ai/v1/engines"
+ else:
+ return
+
+ # Get list of models from OAI
+ logger.init("OAI Engines", status="Retrieving")
+ req = requests.get(
+ url,
+ headers = {
+ 'Authorization': 'Bearer '+self.key
+ }
+ )
+ if(req.status_code == 200):
+ r = req.json()
+ engines = r["data"]
+ try:
+ engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+ except:
+ logger.error(engines)
+ raise
+
+ online_model = ""
+
+
+ logger.init_ok("OAI Engines", status="OK")
+ return engines
+ else:
+ # Something went wrong, print the message and quit since we can't initialize an engine
+ logger.init_err("OAI Engines", status="Failed")
+ logger.error(req.json())
+ emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+ return []
+
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer("gpt2")
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
new file mode 100644
index 00000000..54781296
--- /dev/null
+++ b/modeling/inference_models/parents/hf.py
@@ -0,0 +1,219 @@
+import os
+from typing import Optional
+from transformers import AutoConfig
+
+import utils
+import koboldai_settings
+from logger import logger
+from modeling.inference_model import InferenceModel
+import torch
+
+
+class HFInferenceModel(InferenceModel):
+ def __init__(self) -> None:
+ super().__init__()
+ self.model_config = None
+ #self.model_name = model_name
+
+ self.model = None
+ self.tokenizer = None
+
+ def is_valid(self, model_name, model_path, menu_path):
+ try:
+ if model_path is not None and os.path.exists(model_path):
+ model_config = AutoConfig.from_pretrained(model_path)
+ elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+ model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+ else:
+ model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+ return True
+ except:
+ return False
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ requested_parameters = []
+
+ if model_path is not None and os.path.exists(model_path):
+ model_config = AutoConfig.from_pretrained(model_path)
+ elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+ model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+ else:
+ model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+ layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
+ if layer_count is not None and layer_count >= 0:
+ if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
+ with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
+ data = [x for x in file.read().split("\n")[:2] if x != '']
+ if len(data) < 2:
+ data.append("0")
+ break_values, disk_blocks = data
+ break_values = break_values.split(",")
+ else:
+ break_values = [layer_count]
+ disk_blocks = None
+ break_values = [int(x) for x in break_values if x != '' and x is not None]
+ gpu_count = torch.cuda.device_count()
+ break_values += [0] * (gpu_count - len(break_values))
+ if disk_blocks is not None:
+ break_values += [disk_blocks]
+ for i in range(gpu_count):
+ requested_parameters.append({
+ "uitype": "slider",
+ "unit": "int",
+ "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+ "id": "{} Layers".format(i),
+ "min": 0,
+ "max": layer_count,
+ "step": 1,
+ "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+ "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+ "default": break_values[i],
+ "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+ "menu_path": "Layers",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+ requested_parameters.append({
+ "uitype": "slider",
+ "unit": "int",
+ "label": "CPU Layers",
+ "id": "CPU Layers",
+ "min": 0,
+ "max": layer_count,
+ "step": 1,
+ "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+ "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+ "default": layer_count - sum(break_values),
+ "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
+ "menu_path": "Layers",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+ if disk_blocks is not None:
+ requested_parameters.append({
+ "uitype": "slider",
+ "unit": "int",
+ "label": "Disk Layers",
+ "id": "Disk_Layers",
+ "min": 0,
+ "max": layer_count,
+ "step": 1,
+ "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+ "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+ "default": disk_blocks,
+ "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+ "menu_path": "Layers",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+ else:
+ requested_parameters.append({
+ "uitype": "toggle",
+ "unit": "bool",
+ "label": "Use GPU",
+ "id": "use_gpu",
+ "default": False,
+ "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+ "menu_path": "Layers",
+ "extra_classes": "",
+ "refresh_model_inputs": False
+ })
+
+
+ return requested_parameters
+
+ def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
+ self.layers = layers
+ self.disk_layers = disk_layers
+ self.use_gpu = use_gpu
+
+ def _post_load(self) -> None:
+ # These are model specific tokenizer overrides if a model has bad defaults
+ if utils.koboldai_vars.model_type == "llama":
+ self.tokenizer.decode_with_prefix_space = True
+ self.tokenizer.add_bos_token = False
+ elif utils.koboldai_vars.model_type == "opt":
+ self.tokenizer._koboldai_header = self.tokenizer.encode("")
+ self.tokenizer.add_bos_token = False
+ self.tokenizer.add_prefix_space = False
+
+ # Change newline behavior to match model quirks
+ if utils.koboldai_vars.model_type == "xglm":
+ # Default to newline mode if using XGLM
+ utils.koboldai_vars.newlinemode = "s"
+ elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+ # Handle but don't convert newlines if using Fairseq models that have newlines trained in them
+ utils.koboldai_vars.newlinemode = "ns"
+
+ # Clean up tokens that cause issues
+ if (
+ utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
+ and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+ ):
+ utils.koboldai_vars.badwordsids = [
+ [v]
+ for k, v in self.tokenizer.get_vocab().items()
+ if any(c in str(k) for c in "[]")
+ ]
+
+ if utils.koboldai_vars.newlinemode == "n":
+ utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+
+ return super()._post_load()
+
+ def get_local_model_path(
+ self, legacy: bool = False, ignore_existance: bool = False
+ ) -> Optional[str]:
+ """
+ Returns a string of the model's path locally, or None if it is not downloaded.
+ If ignore_existance is true, it will always return a path.
+ """
+
+ if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
+ model_path = utils.koboldai_vars.custmodpth
+ assert model_path
+
+ # Path can be absolute or relative to models directory
+ if os.path.exists(model_path):
+ return model_path
+
+ model_path = os.path.join("models", model_path)
+
+ try:
+ assert os.path.exists(model_path)
+ except AssertionError:
+ logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
+ raise
+
+ return model_path
+
+ basename = utils.koboldai_vars.model.replace("/", "_")
+ if legacy:
+ ret = basename
+ else:
+ ret = os.path.join("models", basename)
+
+ if os.path.isdir(ret) or ignore_existance:
+ return ret
+ return None
+
+ def init_model_config(self) -> None:
+ # Get the model_type from the config or assume a model type if it isn't present
+ try:
+ self.model_config = AutoConfig.from_pretrained(
+ self.get_local_model_path() or self.model_name,
+ revision=utils.koboldai_vars.revision,
+ cache_dir="cache",
+ )
+ utils.koboldai_vars.model_type = self.model_config.model_type
+ except ValueError:
+ utils.koboldai_vars.model_type = {
+ "NeoCustom": "gpt_neo",
+ "GPT2Custom": "gpt2",
+ }.get(utils.koboldai_vars.model)
+
+ if not utils.koboldai_vars.model_type:
+ logger.warning(
+ "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
+ )
+ utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
similarity index 94%
rename from modeling/inference_models/hf_torch.py
rename to modeling/inference_models/parents/hf_torch.py
index 990fabfc..d8afafb1 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -31,7 +31,7 @@ from modeling import warpers
from modeling.warpers import Warper
from modeling.stoppers import Stoppers
from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_models.hf import HFInferenceModel
+from modeling.inference_models.parents.hf import HFInferenceModel
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
@@ -55,13 +55,13 @@ LOG_SAMPLER_NO_EFFECT = False
class HFTorchInferenceModel(HFInferenceModel):
def __init__(
self,
- model_name: str,
- lazy_load: bool,
- low_mem: bool,
+ #model_name: str,
+ #lazy_load: bool,
+ #low_mem: bool,
) -> None:
- super().__init__(model_name)
- self.lazy_load = lazy_load
- self.low_mem = low_mem
+ super().__init__()
+ #self.lazy_load = lazy_load
+ #self.low_mem = low_mem
self.post_token_hooks = [
PostTokenHooks.stream_tokens,
@@ -211,40 +211,6 @@ class HFTorchInferenceModel(HFInferenceModel):
new_sample.old_sample = transformers.GenerationMixin.sample
use_core_manipulations.sample = new_sample
- # PEFT Loading. This MUST be done after all save_pretrained calls are
- # finished on the main model.
- if utils.args.peft:
- from peft import PeftModel, PeftConfig
- local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
-
- # Make PEFT dir if it doesn't exist
- try:
- os.makedirs(local_peft_dir)
- except FileExistsError:
- pass
-
- peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
- logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
-
- peft_installed_locally = True
- possible_peft_locations = [peft_local_path, utils.args.peft]
-
- for i, location in enumerate(possible_peft_locations):
- try:
- m_self.model = PeftModel.from_pretrained(m_self.model, location)
- logger.debug(f"Loaded PEFT at '{location}'")
- break
- except ValueError:
- peft_installed_locally = False
- if i == len(possible_peft_locations) - 1:
- raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
- except RuntimeError:
- raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
-
- if not peft_installed_locally:
- logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
- m_self.model.save_pretrained(peft_local_path)
-
return super()._post_load()
def _raw_generate(
@@ -272,13 +238,8 @@ class HFTorchInferenceModel(HFInferenceModel):
with torch.no_grad():
start_time = time.time()
-
- # HEED & BEWARE: All arguments passed to self.model.generate MUST be
- # kwargs; see https://github.com/huggingface/peft/issues/232. If they
- # aren't, PeftModel will EXPLODE!!!! But nothing will happen without
- # a PEFT loaded so it's sneaky.
genout = self.model.generate(
- input_ids=gen_in,
+ gen_in,
do_sample=True,
max_length=min(
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
@@ -304,7 +265,6 @@ class HFTorchInferenceModel(HFInferenceModel):
def _get_model(self, location: str, tf_kwargs: Dict):
tf_kwargs["revision"] = utils.koboldai_vars.revision
tf_kwargs["cache_dir"] = "cache"
- tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
# If we have model hints for legacy model, use them rather than fall back.
try:
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
index 006bb8fd..d14d8c81 100644
--- a/modeling/inference_models/rwkv.py
+++ b/modeling/inference_models/rwkv.py
@@ -17,7 +17,7 @@ from torch.nn import functional as F
os.environ["RWKV_JIT_ON"] = "1"
# TODO: Include compiled kernel
os.environ["RWKV_CUDA_ON"] = "1"
-from rwkv.model import RWKV
+
import utils
from logger import logger
@@ -55,13 +55,13 @@ MODEL_FILES = {
}
-class RWKVInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
def __init__(
self,
- model_name: str,
+ #model_name: str,
) -> None:
super().__init__()
- self.model_name = model_name
+ #self.model_name = model_name
self.post_token_hooks = [
PostTokenHooks.stream_tokens,
@@ -83,6 +83,23 @@ class RWKVInferenceModel(InferenceModel):
)
self._old_stopping_criteria = None
+ def is_valid(self, model_name, model_path, menu_path):
+ try:
+ from rwkv.model import RWKV
+ valid = True
+ except:
+ valid = False
+ return valid and "rwkv" in model_name.lower()
+
+ def get_requested_parameters(self, model_name, model_path, menu_path):
+ self.source = model_name
+ requested_parameters = []
+ return requested_parameters
+
+ def set_input_parameters(self):
+ return
+
+
def _ensure_directory_structure(self) -> None:
for path in ["models/rwkv", "models/rwkv/models"]:
try:
@@ -145,6 +162,7 @@ class RWKVInferenceModel(InferenceModel):
# Now we load!
# TODO: Breakmodel to strat
+ from rwkv.model import RWKV
self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
def _apply_warpers(
diff --git a/static/koboldai.css b/static/koboldai.css
index 230f1cbf..f3dde4b7 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding);
}
+.setting_container_model {
+ display: grid;
+ grid-template-areas: "label value"
+ "item item"
+ "minlabel maxlabel";
+ grid-template-rows: 20px 23px 20px;
+ grid-template-columns: auto 30px;
+ row-gap: 0.2em;
+ background-color: var(--setting_background);
+ color: var(--setting_text);
+ border-radius: var(--radius_settings_background);
+ padding: 2px;
+ margin: 2px;
+ width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+ font-size: calc(0.93em + var(--font_size_adjustment));
+ margin-left: 10px;
+}
+
+
.setting_minlabel {
padding-top: 6px;
grid-area: minlabel;
@@ -3370,6 +3392,23 @@ textarea {
}
}
+@keyframes pulse-red {
+ 0% {
+ transform: scale(0.95);
+ box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
+ }
+
+ 70% {
+ transform: scale(1);
+ box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
+ }
+
+ 100% {
+ transform: scale(0.95);
+ box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
+ }
+}
+
@keyframes pulse-text {
0% {
filter: blur(3px);
@@ -3391,6 +3430,11 @@ textarea {
}
}
+.input_error {
+ border: 5px solid red !important;
+ box-sizing: border-box !important;
+}
+
.single_pulse {
animation: pulse-text 0.5s 1;
}
diff --git a/static/koboldai.js b/static/koboldai.js
index cfc32d21..0656253f 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -15,6 +15,7 @@ socket.on('popup_items', function(data){popup_items(data);});
socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
socket.on('popup_edit_file', function(data){popup_edit_file(data);});
socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
socket.on('selected_model_info', function(data){selected_model_info(data);});
socket.on('oai_engines', function(data){oai_engines(data);});
socket.on('buildload', function(data){buildload(data);});
@@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true";
let story_id = -1;
var dirty_chunks = [];
var initial_socketio_connection_occured = false;
+var selected_model_data;
// Each entry into this array should be an object that looks like:
// {class: "class", key: "key", func: callback}
@@ -1500,49 +1502,46 @@ function getModelParameterCount(modelName) {
return base * multiplier;
}
-function show_model_menu(data) {
- //clear old options
- document.getElementById("modelkey").classList.add("hidden");
- document.getElementById("modelkey").value = "";
- document.getElementById("modelurl").classList.add("hidden");
- document.getElementById("use_gpu_div").classList.add("hidden");
- document.getElementById("use_8_bit_div").classList.add("hidden");
- document.getElementById("modellayers").classList.add("hidden");
- document.getElementById("oaimodel").classList.add("hidden");
- var model_layer_bars = document.getElementById('model_layer_bars');
- while (model_layer_bars.firstChild) {
- model_layer_bars.removeChild(model_layer_bars.firstChild);
+function new_show_model_menu(data) {
+ //clear out the loadmodelsettings
+ var loadmodelsettings = document.getElementById('loadmodelsettings')
+ while (loadmodelsettings.firstChild) {
+ loadmodelsettings.removeChild(loadmodelsettings.firstChild);
}
+ document.getElementById("modelplugin").classList.add("hidden");
+ var accept = document.getElementById("btn_loadmodelaccept");
+ accept.disabled = false;
//clear out the breadcrumbs
var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
while (breadcrumbs.firstChild) {
breadcrumbs.removeChild(breadcrumbs.firstChild);
}
- //add breadcrumbs
- //console.log(data.breadcrumbs);
- for (item of data.breadcrumbs) {
- var button = document.createElement("button");
- button.classList.add("breadcrumbitem");
- button.setAttribute("model", data.menu);
- button.setAttribute("folder", item[0]);
- button.textContent = item[1];
- button.onclick = function () {
- socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")});
- };
- breadcrumbs.append(button);
- var span = document.createElement("span");
- span.textContent = "\\";
- breadcrumbs.append(span);
- }
+ //add breadcrumbs
+ if ('breadcrumbs' in data) {
+ for (item of data.breadcrumbs) {
+ var button = document.createElement("button");
+ button.classList.add("breadcrumbitem");
+ button.setAttribute("model", data.menu);
+ button.setAttribute("folder", item[0]);
+ button.textContent = item[1];
+ button.onclick = function () {
+ socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+ };
+ breadcrumbs.append(button);
+ var span = document.createElement("span");
+ span.textContent = "\\";
+ breadcrumbs.append(span);
+ }
+ }
//clear out the items
var model_list = document.getElementById('loadmodellistcontent')
while (model_list.firstChild) {
model_list.removeChild(model_list.firstChild);
}
//add items
- for (item of data.data) {
+ for (item of data.items) {
var list_item = document.createElement("span");
list_item.classList.add("model_item");
@@ -1564,10 +1563,27 @@ function show_model_menu(data) {
//create the actual item
var popup_item = document.createElement("span");
popup_item.classList.add("model");
- popup_item.setAttribute("display_name", item.label);
- popup_item.id = item.name;
+ for (const key in item) {
+ if (key == "name") {
+ popup_item.id = item[key];
+ }
+ popup_item.setAttribute(key, item[key]);
+ }
+
+ popup_item.onclick = function() {
+ var attributes = this.attributes;
+ var obj = {};
+
+ for (var i = 0, len = attributes.length; i < len; i++) {
+ obj[attributes[i].name] = attributes[i].value;
+ }
+ //put the model data on the accept button so we can send it to the server when you accept
+ var accept = document.getElementById("popup_accept");
+ selected_model_data = obj;
+ //send the data to the server so it can figure out what data we need from the user for the model
+ socket.emit('select_model', obj);
+ }
- popup_item.setAttribute("Menu", data.menu)
//name text
var text = document.createElement("span");
text.style="grid-area: item;";
@@ -1615,241 +1631,223 @@ function show_model_menu(data) {
});
})();
- popup_item.onclick = function () {
- var accept = document.getElementById("btn_loadmodelaccept");
- accept.classList.add("disabled");
- socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")});
- var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected");
- for (model of model_list) {
- model.classList.remove("selected");
- }
- this.classList.add("selected");
- accept.setAttribute("selected_model", this.id);
- accept.setAttribute("menu", this.getAttribute("Menu"));
- accept.setAttribute("display_name", this.getAttribute("display_name"));
- };
list_item.append(popup_item);
-
-
model_list.append(list_item);
}
- var accept = document.getElementById("btn_loadmodelaccept");
- accept.disabled = true;
- //finally, if they selected the custom hugging face menu we show the input box
- if (data['menu'] == "customhuggingface") {
- document.getElementById("custommodelname").classList.remove("hidden");
- } else {
- document.getElementById("custommodelname").classList.add("hidden");
- }
-
-
- // detect if we are in a model selection screen and show the reference
- var refelement = document.getElementById("modelspecifier");
- var check = document.getElementById("mainmenu");
- if (check) {
- refelement.classList.remove("hidden");
- } else {
- refelement.classList.add("hidden");
- }
openPopup("load-model");
+
}
+
function selected_model_info(data) {
+ //clear out the loadmodelsettings
+ var loadmodelsettings = document.getElementById('loadmodelsettings')
+ while (loadmodelsettings.firstChild) {
+ loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+ }
var accept = document.getElementById("btn_loadmodelaccept");
- //hide or unhide key
- if (data.key) {
- document.getElementById("modelkey").classList.remove("hidden");
- document.getElementById("modelkey").value = data.key_value;
- } else {
- document.getElementById("modelkey").classList.add("hidden");
- document.getElementById("modelkey").value = "";
- }
- //hide or unhide URL
- if (data.url) {
- document.getElementById("modelurl").classList.remove("hidden");
- } else {
- document.getElementById("modelurl").classList.add("hidden");
- }
-
- //hide or unhide 8 bit mode
- if (data.bit_8_available) {
- document.getElementById("use_8_bit_div").classList.remove("hidden");
- } else {
- document.getElementById("use_8_bit_div").classList.add("hidden");
- document.getElementById("use_8_bit").checked = false;
- }
-
- //default URL loading
- if (data.default_url != null) {
- document.getElementById("modelurl").value = data.default_url;
- }
-
- //change model loading on url if needed
- if (data.models_on_url) {
- document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});};
- document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});};
- } else {
- document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});};
- document.getElementById("modelurl").ochange = null;
- }
-
- //show model select for APIs
- if (data.show_online_model_select) {
- document.getElementById("oaimodel").classList.remove("hidden");
- } else {
- document.getElementById("oaimodel").classList.add("hidden");
- }
-
- //Multiple Model Select?
- if (data.multi_online_models) {
- document.getElementById("oaimodel").setAttribute("multiple", "");
- document.getElementById("oaimodel").options[0].textContent = "All"
- } else {
- document.getElementById("oaimodel").removeAttribute("multiple");
- document.getElementById("oaimodel").options[0].textContent = "Select Model(s)"
- }
-
- //hide or unhide the use gpu checkbox
- if (data.gpu) {
- document.getElementById("use_gpu_div").classList.remove("hidden");
- } else {
- document.getElementById("use_gpu_div").classList.add("hidden");
- }
- //setup breakmodel
- if (data.breakmodel) {
- document.getElementById("modellayers").classList.remove("hidden");
- //setup model layer count
- document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0);
- document.getElementById("gpu_layers_max").textContent = data.layer_count;
- document.getElementById("gpu_count").value = data.gpu_count;
-
- //create the gpu load bars
- var model_layer_bars = document.getElementById('model_layer_bars');
- while (model_layer_bars.firstChild) {
- model_layer_bars.removeChild(model_layer_bars.firstChild);
- }
-
- //Add the bars
- for (let i = 0; i < data.gpu_names.length; i++) {
- var div = document.createElement("div");
- div.classList.add("model_setting_container");
- //build GPU text
- var span = document.createElement("span");
- span.classList.add("model_setting_label");
- span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": "
- //build layer count box
- var input = document.createElement("input");
- input.classList.add("model_setting_value");
- input.classList.add("setting_value");
- input.inputmode = "numeric";
- input.id = "gpu_layers_box_"+i;
- input.value = data.break_values[i];
- input.onblur = function () {
- document.getElementById(this.id.replace("_box", "")).value = this.value;
- update_gpu_layers();
- }
- span.append(input);
- div.append(span);
- //build layer count slider
- var input = document.createElement("input");
- input.classList.add("model_setting_item");
- input.type = "range";
- input.min = 0;
- input.max = data.layer_count;
- input.step = 1;
- input.value = data.break_values[i];
- input.id = "gpu_layers_" + i;
- input.onchange = function () {
- document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value;
- update_gpu_layers();
- }
- div.append(input);
- //build slider bar #s
- //min
- var span = document.createElement("span");
- span.classList.add("model_setting_minlabel");
- var span2 = document.createElement("span");
- span2.style="top: -4px; position: relative;";
- span2.textContent = 0;
- span.append(span2);
- div.append(span);
- //max
- var span = document.createElement("span");
- span.classList.add("model_setting_maxlabel");
- var span2 = document.createElement("span");
- span2.style="top: -4px; position: relative;";
- span2.textContent = data.layer_count;
- span.append(span2);
- div.append(span);
-
- model_layer_bars.append(div);
- }
-
- //add the disk layers
- if (data.disk_break) {
- var div = document.createElement("div");
- div.classList.add("model_setting_container");
- //build GPU text
- var span = document.createElement("span");
- span.classList.add("model_setting_label");
- span.textContent = "Disk cache: "
- //build layer count box
- var input = document.createElement("input");
- input.classList.add("model_setting_value");
- input.classList.add("setting_value");
- input.inputmode = "numeric";
- input.id = "disk_layers_box";
- input.value = data.disk_break_value;
- input.onblur = function () {
- document.getElementById(this.id.replace("_box", "")).value = this.value;
- update_gpu_layers();
- }
- span.append(input);
- div.append(span);
- //build layer count slider
- var input = document.createElement("input");
- input.classList.add("model_setting_item");
- input.type = "range";
- input.min = 0;
- input.max = data.layer_count;
- input.step = 1;
- input.value = data.disk_break_value;
- input.id = "disk_layers";
- input.onchange = function () {
- document.getElementById(this.id+"_box").value = this.value;
- update_gpu_layers();
- }
- div.append(input);
- //build slider bar #s
- //min
- var span = document.createElement("span");
- span.classList.add("model_setting_minlabel");
- var span2 = document.createElement("span");
- span2.style="top: -4px; position: relative;";
- span2.textContent = 0;
- span.append(span2);
- div.append(span);
- //max
- var span = document.createElement("span");
- span.classList.add("model_setting_maxlabel");
- var span2 = document.createElement("span");
- span2.style="top: -4px; position: relative;";
- span2.textContent = data.layer_count;
- span.append(span2);
- div.append(span);
- }
-
- model_layer_bars.append(div);
-
- update_gpu_layers();
- } else {
- document.getElementById("modellayers").classList.add("hidden");
- accept.classList.remove("disabled");
- }
accept.disabled = false;
+ modelplugin = document.getElementById("modelplugin");
+ modelplugin.classList.remove("hidden");
+ modelplugin.onchange = function () {
+ for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+ area.classList.add("hidden");
+ }
+ document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+ }
+ //create the content
+ for (const [loader, items] of Object.entries(data)) {
+ model_area = document.createElement("DIV");
+ model_area.id = loader + "_settings_area";
+ model_area.classList.add("model_plugin_settings_area");
+ model_area.classList.add("hidden");
+ modelpluginoption = document.createElement("option");
+ modelpluginoption.innerText = loader;
+ modelpluginoption.value = loader;
+ modelplugin.append(modelpluginoption);
+
+ for (item of items) {
+ let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+ new_setting.id = loader;
+ new_setting.classList.remove("hidden");
+ new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+ new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+
+ onchange_event = function () {
+ //get check value:
+ if ('sum' in this.check_data) {
+ check_value = 0
+ for (const temp of this.check_data['sum']) {
+ if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
+ check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value);
+ }
+ }
+ } else {
+ check_value = this.value
+ }
+ if (this.check_data['check'] == "=") {
+ valid = (check_value == this.check_data['value']);
+ } else if (this.check_data['check'] == "!=") {
+ valid = (check_value != this.check_data['value']);
+ } else if (this.check_data['check'] == ">=") {
+ valid = (check_value >= this.check_data['value']);
+ } else if (this.check_data['check'] == "<=") {
+ valid = (check_value <= this.check_data['value']);
+ } else if (this.check_data['check'] == "<=") {
+ valid = (check_value > this.check_data['value']);
+ } else if (this.check_data['check'] == "<=") {
+ valid = (check_value < this.check_data['value']);
+ }
+ if (valid) {
+ //if we are supposed to refresh when this value changes we'll resubmit
+ if (this.getAttribute("refresh_model_inputs") == "true") {
+ console.log("resubmit");
+ }
+ if ('sum' in this.check_data) {
+ for (const temp of this.check_data['sum']) {
+ if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
+ document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+ document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+ }
+ }
+ } else {
+ this.closest(".setting_container_model").classList.remove('input_error');
+ this.closest(".setting_container_model").removeAttribute("tooltip");
+ }
+ var accept = document.getElementById("btn_loadmodelaccept");
+ if (document.getElementsByClassName("input_error").length)
+ accept.disabled = true;
+ } else {
+ if ('sum' in this.check_data) {
+ for (const temp of this.check_data['sum']) {
+ if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
+ document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+ document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+ }
+ }
+ } else {
+ this.closest(".setting_container_model").classList.add('input_error');
+ this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+ }
+ }
+ var accept = document.getElementById("btn_loadmodelaccept");
+ if (document.getElementsByClassName("input_error").length > 0) {
+ accept.classList.add("disabled");
+ accept.disabled = true;
+ } else {
+ accept.classList.remove("disabled");
+ accept.disabled = false;
+ }
+
+ }
+ if (item['uitype'] == "slider") {
+ var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+ slider_number.value = item['default'];
+ slider_number.id = loader + "|" + item['id'] + "_value_text";
+ slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+ var slider = new_setting.querySelector('#blank_model_settings_slider');
+ slider.value = item['default'];
+ slider.min = item['min'];
+ slider.max = item['max'];
+ slider.id = loader + "|" + item['id'] + "_value";
+ if ('check' in item) {
+ slider.check_data = item['check'];
+ slider_number.check_data = item['check'];
+ } else {
+ slider.check_data = null;
+ slider_number.check_data = null;
+ }
+ slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+ slider.onchange = onchange_event;
+ slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+ new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+ new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+ slider.onchange();
+ } else {
+ new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden");
+ }
+ if (item['uitype'] == "toggle") {
+ var toggle = new_setting.querySelector('#blank_model_settings_toggle');
+ toggle.id = loader + "|" + item['id'] + "_value";
+ toggle.checked = item['default'];
+ toggle.onchange = onchange_event;
+ toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+ if ('check' in item) {
+ toggle.check_data = item['check'];
+ } else {
+ toggle.check_data = null;
+ }
+ toggle.onchange();
+ } else {
+ new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden");
+ new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden");
+ }
+ if (item['uitype'] == "dropdown") {
+ var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+ select_element.id = loader + "|" + item['id'] + "_value";
+ for (const dropdown_value of item['children']) {
+ new_option = document.createElement("option");
+ new_option.value = dropdown_value['value'];
+ new_option.innerText = dropdown_value['text'];
+ select_element.append(new_option);
+ }
+ select_element.value = item['default'];
+ select_element.onchange = onchange_event;
+ select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+ if ('check' in item) {
+ select_element.check_data = item['check'];
+ } else {
+ select_element.check_data = null;
+ }
+ select_element.onchange();
+ } else {
+ new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden");
+ }
+ if (item['uitype'] == "password") {
+ var password_item = new_setting.querySelector('#blank_model_settings_password');
+ password_item.id = loader + "|" + item['id'] + "_value";
+ password_item.value = item['default'];
+ password_item.onchange = onchange_event;
+ password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+ if ('check' in item) {
+ password_item.check_data = item['check'];
+ } else {
+ password_item.check_data = null;
+ }
+ password_item.onchange();
+ } else {
+ new_setting.querySelector('#blank_model_settings_password').classList.add("hidden");
+ }
+ if (item['uitype'] == "text") {
+ var text_item = new_setting.querySelector('#blank_model_settings_text');
+ text_item.id = loader + "|" + item['id'] + "_value";
+ text_item.value = item['default'];
+ text_item.onchange = onchange_event;
+ text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+ if ('check' in item) {
+ text_item.check_data = item['check'];
+ } else {
+ text_item.check_data = null;
+ }
+ text_item.onchange();
+ } else {
+ new_setting.querySelector('#blank_model_settings_text').classList.add("hidden");
+ }
+
+ model_area.append(new_setting);
+ loadmodelsettings.append(model_area);
+ }
+ }
+
+ //unhide the first plugin settings
+ console.log(document.getElementById("modelplugin").value + "_settings_area");
+ if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+ document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+ }
}
@@ -1877,42 +1875,16 @@ function update_gpu_layers() {
function load_model() {
var accept = document.getElementById('btn_loadmodelaccept');
- gpu_layers = []
- disk_layers = 0;
- if (!(document.getElementById("modellayers").classList.contains("hidden"))) {
- for (let i=0; i < document.getElementById("gpu_count").value; i++) {
- gpu_layers.push(document.getElementById("gpu_layers_"+i).value);
- }
- if (document.getElementById("disk_layers")) {
- disk_layers = document.getElementById("disk_layers").value;
- }
- }
- //Need to do different stuff with custom models
- if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) {
- var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu");
- var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name");
- } else {
- var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model");
- var path = "";
- }
+ settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
- let selected_models = [];
- for (item of document.getElementById("oaimodel").selectedOptions) {
- selected_models.push(item.value);
- }
- if (selected_models == ['']) {
-
- selected_models = [];
- } else if (selected_models.length == 1) {
- selected_models = selected_models[0];
+ //get an object of all the input settings from the user
+ data = {}
+ for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+ data[element.id.split("|")[1].replace("_value", "")] = element.value;
}
+ data = {...data, ...selected_model_data}
- message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked,
- 'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(),
- 'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value,
- 'online_model': selected_models,
- 'use_8_bit': document.getElementById('use_8_bit').checked};
- socket.emit("load_model", message);
+ socket.emit("load_model", data);
closePopups();
}
diff --git a/templates/popups.html b/templates/popups.html
index 12c4c27a..59f07e70 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -46,35 +46,11 @@
Usage (VRAM)
-
-
-
+
+
+
diff --git a/templates/templates.html b/templates/templates.html
index 4f16ff66..49cd3e5b 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -154,3 +154,22 @@
+
+
+
+ : help_icon
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file