Merge pull request #362 from ebolam/Model_Plugins

Implement modular model backends Phase 1
This commit is contained in:
henk717
2023-05-27 15:33:20 +02:00
committed by GitHub
24 changed files with 2991 additions and 1242 deletions

View File

@@ -56,6 +56,7 @@ import html
import argparse
import sys
import gc
import traceback
import lupa
@@ -167,6 +168,7 @@ class MenuFolder(MenuItem):
"size": "",
"isMenu": True,
"isDownloaded": False,
"isDirectory": False
}
class MenuModel(MenuItem):
@@ -177,11 +179,13 @@ class MenuModel(MenuItem):
vram_requirements: str = "",
model_type: MenuModelType = MenuModelType.HUGGINGFACE,
experimental: bool = False,
model_backend: str = "Huggingface",
) -> None:
super().__init__(label, name, experimental)
self.model_type = model_type
self.vram_requirements = vram_requirements
self.is_downloaded = is_model_downloaded(self.name)
self.model_backend = model_backend
def to_ui1(self) -> list:
return [
@@ -199,8 +203,28 @@ class MenuModel(MenuItem):
"size": self.vram_requirements,
"isMenu": False,
"isDownloaded": self.is_downloaded,
"isDirectory": False,
}
class MenuPath(MenuItem):
def to_ui1(self) -> list:
return [
self.label,
self.name,
"",
True,
]
def to_json(self) -> dict:
return {
"label": self.label,
"name": self.name,
"size": "",
"isMenu": True,
"isDownloaded": False,
"isDirectory": True,
"path": "./models"
}
# AI models Menu
# This is a dict of lists where they key is the menu name, and the list is the menu items.
@@ -208,9 +232,9 @@ class MenuModel(MenuItem):
# 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
model_menu = {
"mainmenu": [
MenuModel("Load a model from its directory", "NeoCustom"),
MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
MenuPath("Load a model from its directory", "NeoCustom"),
MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
MenuModel("Load custom model from Hugging Face", "customhuggingface", ""),
MenuFolder("Adventure Models", "adventurelist"),
MenuFolder("Novel Models", "novellist"),
MenuFolder("Chat Models", "chatlist"),
@@ -224,7 +248,7 @@ model_menu = {
MenuFolder("Official RWKV-4", "rwkvlist"),
MenuFolder("Untuned GPT2", "gpt2list"),
MenuFolder("Online Services", "apilist"),
MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
],
'adventurelist': [
MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"),
@@ -361,12 +385,11 @@ model_menu = {
MenuFolder("Return to Main Menu", "mainmenu"),
],
'apilist': [
MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API),
MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API),
MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API),
MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API),
MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API),
MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API),
MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"),
MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"),
MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"),
MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"),
MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"),
MenuFolder("Return to Main Menu", "mainmenu"),
]
}
@@ -599,6 +622,24 @@ utils.socketio = socketio
# Weird import position to steal koboldai_vars from utils
from modeling.patches import patch_transformers
#Load all of the model importers
import importlib
model_backend_code = {}
model_backends = {}
for module in os.listdir("./modeling/inference_models"):
if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
try:
model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]):
if model_backends[model_backend_code[module].model_backend_name].disable:
del model_backends[model_backend_code[module].model_backend_name]
except Exception:
logger.error("Model Backend {} failed to load".format(module))
logger.error(traceback.format_exc())
logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends])))
old_socketio_on = socketio.on
def new_socketio_on(*a, **k):
@@ -614,10 +655,14 @@ def new_socketio_on(*a, **k):
socketio.on = new_socketio_on
def emit(*args, **kwargs):
try:
return _emit(*args, **kwargs)
except AttributeError:
return socketio.emit(*args, **kwargs)
if has_request_context():
try:
return _emit(*args, **kwargs)
except AttributeError:
return socketio.emit(*args, **kwargs)
else: #We're trying to send data outside of the http context. This won't work. Try the relay
if koboldai_settings.queue is not None:
koboldai_settings.queue.put([args[0], args[1], kwargs])
utils.emit = emit
#replacement for tpool.execute to maintain request contexts
@@ -905,6 +950,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
)
def get_folder_path_info(base):
if base is None:
return [], []
if base == 'This PC':
breadcrumbs = [['This PC', 'This PC']]
paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
@@ -987,7 +1034,7 @@ def getmodelname():
if(koboldai_vars.online_model != ''):
return(f"{koboldai_vars.model}/{koboldai_vars.online_model}")
if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
modelname = os.path.basename(os.path.normpath(model.path))
return modelname
else:
modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only"
@@ -1318,16 +1365,14 @@ def general_startup(override_args=None):
parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)")
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
parser.add_argument("--apikey", help="Specify the API key to use for online services")
parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11")
parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.")
parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
@@ -1360,6 +1405,7 @@ def general_startup(override_args=None):
args = parser.parse_args(shlex.split(override_args))
elif(os.environ.get("KOBOLDAI_ARGS") is not None):
import shlex
logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"]))
args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
else:
args = parser.parse_args()
@@ -1382,9 +1428,11 @@ def general_startup(override_args=None):
for arg in temp:
if arg == "path":
if "model_path" in os.environ:
logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"]))
setattr(args, arg, os.environ["model_path"])
else:
if arg in os.environ:
logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg]))
if isinstance(getattr(args, arg), bool):
if os.environ[arg].lower() == "true":
setattr(args, arg, True)
@@ -1410,8 +1458,6 @@ def general_startup(override_args=None):
args.max_summary_length = int(args.max_summary_length)
if args.model:
koboldai_vars.model = args.model;
koboldai_vars.revision = args.revision
koboldai_settings.multi_story = args.multi_story
@@ -1436,7 +1482,7 @@ def general_startup(override_args=None):
koboldai_vars.quiet = True
if args.nobreakmodel:
koboldai_vars.nobreakmodel = True
model_backends['Huggingface'].nobreakmodel = True
if args.remote:
koboldai_vars.host = True;
@@ -1447,6 +1493,9 @@ def general_startup(override_args=None):
if args.localtunnel:
koboldai_vars.host = True;
if args.lowmem:
model_backends['Huggingface'].low_mem = True
if args.host != "Disabled":
# This means --host option was submitted without an argument
# Enable all LAN IPs (0.0.0.0/0)
@@ -1479,6 +1528,9 @@ def general_startup(override_args=None):
koboldai_vars.trust_remote_code = True
if args.cpu:
koboldai_vars.use_colab_tpu = False
koboldai_vars.hascuda = False
koboldai_vars.usegpu = False
model_backends['Huggingface'].nobreakmodel = True
koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1493,262 +1545,67 @@ def general_startup(override_args=None):
if(modpath):
# Save directory to koboldai_vars
koboldai_vars.model = "NeoCustom"
koboldai_vars.custmodpth = modpath
args.path = modpath
elif args.model:
logger.message(f"Welcome to KoboldAI!")
logger.message(f"You have selected the following Model: {koboldai_vars.model}")
logger.message(f"You have selected the following Model: {args.model}")
if args.path:
logger.message(f"You have selected the following path for your Model: {args.path}")
koboldai_vars.custmodpth = args.path;
koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
#setup socketio relay queue
koboldai_settings.queue = multiprocessing.Queue()
socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
#==================================================================#
# Load Model
#==================================================================#
@socketio.on("get_model_info")
def get_model_info(model, directory=""):
logger.info("Selected: {}, {}".format(model, directory))
# if the model is in the api list
disk_blocks = 0
key = False
breakmodel = False
gpu = False
layer_count = None
key_value = ""
break_values = []
url = False
default_url = None
models_on_url = False
multi_online_models = False
show_online_model_select=False
gpu_count = torch.cuda.device_count()
gpu_names = []
send_horde_models = False
show_custom_model_box = False
for i in range(gpu_count):
gpu_names.append(torch.cuda.get_device_name(i))
if model in ['Colab', 'API']:
url = True
elif model == 'CLUSTER':
models_on_url = True
show_online_model_select=True
url = True
key = True
default_url = koboldai_vars.horde_url
multi_online_models = True
key_value = koboldai_vars.horde_api_key
url = koboldai_vars.horde_url
if key_value:
send_horde_models = True
elif model in [x.name for x in model_menu['apilist']]:
show_online_model_select=True
if path.exists("settings/{}.v2_settings".format(model)):
with open("settings/{}.v2_settings".format(model), "r") as file:
# Check if API key exists
try:
js = json.load(file)
if("apikey" in js and js["apikey"] != ""):
# API key exists, grab it and close the file
key_value = js["apikey"]
elif 'oaiapikey' in js and js['oaiapikey'] != "":
key_value = js["oaiapikey"]
if model in ('GooseAI', 'OAI'):
get_oai_models({'model': model, 'key': key_value})
except json.decoder.JSONDecodeError:
print(":(")
pass
key = True
elif model == 'ReadOnly':
pass
#elif model == 'customhuggingface':
# show_custom_model_box = True
elif args.cpu:
pass
else:
layer_count = get_layer_count(model, directory=directory)
if layer_count is None:
breakmodel = False
gpu = True
else:
breakmodel = True
if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]:
filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory)))
else:
filename = "settings/{}.breakmodel".format(model.replace("/", "_"))
if path.exists(filename):
with open(filename, "r") as file:
data = [x for x in file.read().split("\n")[:2] if x != '']
if len(data) < 2:
data.append("0")
break_values, disk_blocks = data
break_values = break_values.split(",")
else:
break_values = [layer_count]
break_values = [int(x) for x in break_values if x != '']
break_values += [0] * (gpu_count - len(break_values))
emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url,
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel,
'disk_break_value': disk_blocks, 'accelerate': True,
'break_values': break_values, 'gpu_count': gpu_count,
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
emit('selected_model_info', {'key_value': key_value, 'key':key,
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url,
'disk_break_value': disk_blocks, 'disk_break': True,
'break_values': break_values, 'gpu_count': gpu_count,
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
'show_custom_model_box': show_custom_model_box})
if send_horde_models:
get_cluster_models({'key': key_value, 'url': default_url})
elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER':
get_oai_models(key_value)
if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
args.model_backend = "Huggingface MTJ"
def get_layer_count(model, directory=""):
if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
if(model == "GPT2Custom"):
with open(os.path.join(directory, "config.json"), "r") as f:
model_config = json.load(f)
# Get the model_type from the config or assume a model type if it isn't present
else:
if(directory):
model = directory
from transformers import AutoConfig
if(os.path.isdir(model.replace('/', '_'))):
model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
elif(is_model_downloaded(model)):
model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
elif(os.path.isdir(directory)):
model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
else:
model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
try:
if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
return utils.num_layers(model_config)
else:
return None
except:
return None
else:
return None
@socketio.on('OAI_Key_Update')
def get_oai_models(data):
key = data['key']
model = data['model']
koboldai_vars.oaiapikey = key
if model == 'OAI':
url = "https://api.openai.com/v1/engines"
elif model == 'GooseAI':
url = "https://api.goose.ai/v1/engines"
else:
return
if args.model:
# At this point we have to try to load the model through the selected backend
if args.model_backend not in model_backends:
logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
exit()
#OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
ok_to_load = True
mising_parameters = []
arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {}
# Get list of models from OAI
logger.init("OAI Engines", status="Retrieving")
req = requests.get(
url,
headers = {
'Authorization': 'Bearer '+key
}
)
if(req.status_code == 200):
r = req.json()
engines = r["data"]
try:
engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
except:
logger.error(engines)
raise
#If we're on colab we'll set everything to GPU0
if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
arg_parameters['use_gpu'] = True
online_model = ""
changed=False
#Save the key
if not path.exists("settings"):
# If the client settings file doesn't exist, create it
# Write API key to file
os.makedirs('settings', exist_ok=True)
if path.exists("settings/{}.v2_settings".format(model)):
with open("settings/{}.v2_settings".format(model), "r") as file:
js = json.load(file)
if 'online_model' in js:
online_model = js['online_model']
if "apikey" in js:
if js['apikey'] != key:
changed=True
else:
js = {}
changed=True
if changed:
with open("settings/{}.v2_settings".format(model), "w") as file:
js["apikey"] = key
file.write(json.dumps(js, indent=3))
logger.init_ok("OAI Engines", status="OK")
emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
for parameter in parameters:
if parameter['uitype'] != "Valid Display":
if parameter['default'] == "" and parameter['id'] not in arg_parameters:
mising_parameters.append(parameter['id'])
ok_to_load = False
elif parameter['id'] not in arg_parameters:
arg_parameters[parameter['id']] = parameter['default']
if not ok_to_load:
logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
logger.error("Missing: {}".format(", ".join(mising_parameters)))
exit()
if args.model_parameters.lower() == "help":
logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
exit()
arg_parameters['id'] = args.model
arg_parameters['model'] = args.model
arg_parameters['path'] = args.path
arg_parameters['menu_path'] = ""
model_backends[args.model_backend].set_input_parameters(arg_parameters)
koboldai_vars.model = args.model
return args.model_backend
else:
# Something went wrong, print the message and quit since we can't initialize an engine
logger.init_err("OAI Engines", status="Failed")
logger.error(req.json())
emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
@socketio.on("get_cluster_models")
def get_cluster_models(msg):
koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key
url = msg['url'] or koboldai_vars.horde_url
koboldai_vars.horde_url = url
# Get list of models from public cluster
print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
try:
req = requests.get(f"{url}/api/v2/status/models?type=text")
except:
logger.init_err("KAI Horde Models", status="Failed")
logger.error("Provided KoboldAI Horde URL unreachable")
emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
return
if not req.ok:
# Something went wrong, print the message and quit since we can't initialize an engine
logger.init_err("KAI Horde Models", status="Failed")
logger.error(req.json())
emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
return
engines = req.json()
logger.debug(engines)
try:
engines = [[en["name"], en["name"]] for en in engines]
except:
logger.error(engines)
raise
logger.debug(engines)
online_model = ""
savesettings()
logger.init_ok("KAI Horde Models", status="OK")
emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
def reset_model_settings():
koboldai_vars.reset_for_model_load()
return "Read Only"
def unload_model():
global model
@@ -1781,7 +1638,7 @@ def unload_model():
koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
def load_model(model_backend, initial_load=False):
global model
global tokenizer
global model_config
@@ -1792,188 +1649,48 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
if initial_load:
use_breakmodel_args = True
reset_model_settings()
koboldai_vars.reset_model()
koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model
if koboldai_vars.cluster_requested_models == [""]:
koboldai_vars.cluster_requested_models = []
koboldai_vars.noai = False
if not use_breakmodel_args:
set_aibusy(True)
if koboldai_vars.model != 'ReadOnly':
emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
#Have to add a sleep so the server will send the emit for some reason
time.sleep(0.1)
set_aibusy(True)
if koboldai_vars.model != 'ReadOnly':
emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True)
#Have to add a sleep so the server will send the emit for some reason
time.sleep(0.1)
if gpu_layers is not None:
args.breakmodel_gpulayers = gpu_layers
elif use_breakmodel_args:
gpu_layers = args.breakmodel_gpulayers
if breakmodel_args_default_to_cpu and gpu_layers is None:
gpu_layers = args.breakmodel_gpulayers = []
if disk_layers is not None:
args.breakmodel_disklayers = int(disk_layers)
elif use_breakmodel_args:
disk_layers = args.breakmodel_disklayers
if breakmodel_args_default_to_cpu and disk_layers is None:
disk_layers = args.breakmodel_disklayers = 0
if 'model' in globals():
model.unload()
unload_model()
if online_model == "":
koboldai_vars.configname = getmodelname()
#Let's set the GooseAI or OpenAI server URLs if that's applicable
else:
koboldai_vars.online_model = online_model
# Swap OAI Server if GooseAI was selected
if koboldai_vars.model == "GooseAI":
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
koboldai_vars.model = "OAI"
koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list):
if len(online_model) != 1:
koboldai_vars.configname = koboldai_vars.model
else:
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
else:
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
if path.exists(get_config_filename()):
changed=False
with open(get_config_filename(), "r") as file:
# Check if API key exists
js = json.load(file)
if 'online_model' in js:
if js['online_model'] != online_model:
changed=True
js['online_model'] = online_model
else:
changed=True
js['online_model'] = online_model
if changed:
with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
file.write(json.dumps(js, indent=3))
# Swap OAI Server if GooseAI was selected
if koboldai_vars.model == "GooseAI":
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
koboldai_vars.model = "OAI"
args.configname = "GooseAI" + "/" + online_model
elif koboldai_vars.model != "CLUSTER":
args.configname = koboldai_vars.model + "/" + online_model
koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
# If transformers model was selected & GPU available, ask to use CPU or GPU
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
# loadmodelsettings()
# loadsettings()
logger.init("GPU support", status="Searching")
koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
if(args.breakmodel is not None and args.breakmodel):
logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
if(args.breakmodel_layers is not None):
logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)):
logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
koboldai_vars.bmsupported = False
if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
logger.warning("This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.")
if(koboldai_vars.hascuda):
logger.init_ok("GPU support", status="Found")
else:
logger.init_warn("GPU support", status="Not Found")
if args.cpu:
koboldai_vars.usegpu = False
gpu_layers = None
disk_layers = None
koboldai_vars.breakmodel = False
elif koboldai_vars.hascuda:
if(koboldai_vars.bmsupported):
koboldai_vars.usegpu = False
koboldai_vars.breakmodel = True
else:
koboldai_vars.breakmodel = False
koboldai_vars.usegpu = use_gpu
#if koboldai_vars.hascuda:
# if(koboldai_vars.bmsupported):
# koboldai_vars.usegpu = False
# koboldai_vars.breakmodel = True
# else:
# koboldai_vars.breakmodel = False
# koboldai_vars.usegpu = use_gpu
else:
koboldai_vars.default_preset = koboldai_settings.default_preset
# Ask for API key if InferKit was selected
if koboldai_vars.model == "InferKit":
koboldai_vars.apikey = koboldai_vars.oaiapikey
# Swap OAI Server if GooseAI was selected
if koboldai_vars.model == "GooseAI":
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
koboldai_vars.model = "OAI"
koboldai_vars.configname = "GooseAI"
# Ask for API key if OpenAI was selected
if koboldai_vars.model == "OAI" and not koboldai_vars.configname:
koboldai_vars.configname = "OAI"
if koboldai_vars.model == "ReadOnly":
koboldai_vars.noai = True
# TODO: InferKit
if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
pass
elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
koboldai_vars.colaburl = url or koboldai_vars.colaburl
koboldai_vars.usegpu = False
koboldai_vars.breakmodel = False
if koboldai_vars.model == "Colab":
from modeling.inference_models.basic_api import BasicAPIInferenceModel
model = BasicAPIInferenceModel()
elif koboldai_vars.model == "API":
from modeling.inference_models.api import APIInferenceModel
model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", ""))
elif koboldai_vars.model == "CLUSTER":
from modeling.inference_models.horde import HordeInferenceModel
model = HordeInferenceModel()
elif koboldai_vars.model == "OAI":
from modeling.inference_models.openai import OpenAIAPIInferenceModel
model = OpenAIAPIInferenceModel()
model.load(initial_load=initial_load)
# TODO: This check sucks, make a model object or somethign
elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
# HF Torch
logger.init("Transformers", status='Starting')
for m in ("GPTJModel", "XGLMModel"):
try:
globals()[m] = getattr(__import__("transformers"), m)
except:
pass
from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
model = GenericHFTorchInferenceModel(
koboldai_vars.model,
lazy_load=koboldai_vars.lazy_load,
low_mem=args.lowmem
)
model.load(
save_model=not (args.colab or args.cacheonly) or args.savemodel,
initial_load=initial_load,
)
logger.info(f"Pipeline created: {koboldai_vars.model}")
else:
# TPU
from modeling.inference_models.hf_mtj import HFMTJInferenceModel
model = HFMTJInferenceModel(
koboldai_vars.model
)
model.load(
save_model=not (args.colab or args.cacheonly) or args.savemodel,
initial_load=initial_load,
)
model = model_backends[model_backend]
model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
logger.info(koboldai_vars.model)
logger.debug("Model Type: {}".format(koboldai_vars.model_type))
# TODO: Convert everywhere to use model.tokenizer
if model:
@@ -3993,7 +3710,8 @@ def calcsubmit(txt):
bias += [1] * (i - top_index)
bias[i] = b["multiplier"]
device = utils.get_auxilary_device()
device = model.get_auxilary_device()
attention_bias.attention_bias = torch.Tensor(bias).to(device)
logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}")
logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))
@@ -6422,7 +6140,9 @@ def UI_2_retry(data):
@socketio.on('load_model_button')
@logger.catch
def UI_2_load_model_button(data):
sendModelSelection()
emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]})
#==================================================================#
# Event triggered when user clicks the a model
@@ -6430,23 +6150,56 @@ def UI_2_load_model_button(data):
@socketio.on('select_model')
@logger.catch
def UI_2_select_model(data):
#We've selected a menu
if data['model'] in model_menu:
sendModelSelection(menu=data['model'])
#We've selected a custom line
elif data['menu'] in ("NeoCustom", "GPT2Custom"):
get_model_info(data['menu'], directory=data['display_name'])
#We've selected a custom menu folder
elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data:
sendModelSelection(menu=data['model'], folder=data['path'])
#We've selected a custom menu
elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"):
sendModelSelection(menu=data['model'], folder="./models")
logger.debug("Clicked on model entry: {}".format(data))
if data["name"] in model_menu and data['ismenu'] == "true":
emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
else:
#We now have some model we want to potentially load.
#First we need to send the client the model parameters (layers, etc)
get_model_info(data['model'])
#Get load methods
if 'ismenu' in data and data['ismenu'] == 'false':
valid_loaders = {}
if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
#Here if we have a model id that's in our menu, we explicitly use that backend
for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
emit("selected_model_info", {"model_backends": valid_loaders})
else:
#Here we have a model that's not in our menu structure (either a custom model or a custom path
#so we'll just go through all the possible loaders
for model_backend in model_backends:
if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
emit("selected_model_info", {"model_backends": valid_loaders})
else:
#Get directories
paths, breadcrumbs = get_folder_path_info(data['path'])
output = []
for path in paths:
valid=False
for model_backend in model_backends:
if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
logger.debug("{} says valid".format(model_backend))
valid=True
break
else:
logger.debug("{} says invalid".format(model_backend))
output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
return
#==================================================================#
# Event triggered when user changes a model parameter and it's set to resubmit
#==================================================================#
@socketio.on('resubmit_model_info')
@logger.catch
def UI_2_resubmit_model_info(data):
valid_loaders = {}
for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data)
emit("selected_model_info", {"model_backends": valid_loaders})
#==================================================================#
# Event triggered when user loads a model
@@ -6454,26 +6207,10 @@ def UI_2_select_model(data):
@socketio.on('load_model')
@logger.catch
def UI_2_load_model(data):
if not os.path.exists("settings/"):
os.mkdir("settings")
changed = True
if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
file_data = file.read().split('\n')[:2]
if len(file_data) < 2:
file_data.append("0")
gpu_layers, disk_layers = file_data
if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']:
changed = False
if changed:
f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w")
f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers']))
f.close()
koboldai_vars.colaburl = data['url'] + "/request"
koboldai_vars.model = data['model']
koboldai_vars.custmodpth = data['path']
print("loading Model")
load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
logger.debug("Loading model with user input of: {}".format(data))
model_backends[data['plugin']].set_input_parameters(data)
load_model(data['plugin'])
#load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
#==================================================================#
# Event triggered when load story is clicked
@@ -8095,7 +7832,8 @@ def send_one_time_messages(data, wait_time=0):
# Test
#==================================================================#
def model_info():
if model_config is not None:
global model_config
if 'model_config' in globals() and model_config is not None:
if isinstance(model_config, dict):
if 'model_type' in model_config:
model_type = str(model_config['model_type'])
@@ -10982,10 +10720,8 @@ for schema in config_endpoint_schemas:
#==================================================================#
# Final startup commands to launch Flask app
#==================================================================#
def startup():
if koboldai_vars.model == "" or koboldai_vars.model is None:
koboldai_vars.model = "ReadOnly"
socketio.start_background_task(load_model, **{'initial_load':True})
def startup(command_line_backend):
socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True})
print("", end="", flush=True)
@@ -10994,7 +10730,7 @@ def run():
global app
global tpu_mtj_backend
general_startup()
command_line_backend = general_startup()
# Start flask & SocketIO
logger.init("Flask", status="Starting")
if koboldai_vars.host:
@@ -11044,7 +10780,7 @@ def run():
cloudflare = _run_cloudflared(port)
koboldai_vars.cloudflare_link = cloudflare
startup()
startup(command_line_backend)
if(args.localtunnel or args.ngrok or args.remote):
with open('cloudflare.log', 'w') as cloudflarelog:
@@ -11064,7 +10800,7 @@ def run():
else:
socketio.run(app, port=port)
else:
startup()
startup(command_line_backend)
if args.unblock:
if not args.no_ui:
try:
@@ -11092,13 +10828,13 @@ def run():
if __name__ == "__main__":
run()
else:
general_startup()
command_line_backend = general_startup()
# Start flask & SocketIO
logger.init("Flask", status="Starting")
Session(app)
logger.init_ok("Flask", status="OK")
patch_transformers()
startup()
startup(command_line_backend)
koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000
print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)

File diff suppressed because one or more lines are too long

View File

@@ -647,7 +647,7 @@ class settings(object):
raise
class model_settings(settings):
local_only_variables = ['badwordsids', 'apikey', 'default_preset']
local_only_variables = ['apikey', 'default_preset']
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
@@ -710,7 +710,6 @@ class model_settings(settings):
self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
self.newlinemode = "n"
self.lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
self.presets = [] # Holder for presets
self.selected_preset = ""
self.uid_presets = []
@@ -1203,7 +1202,6 @@ class undefined_settings(settings):
super().__setattr__(name, value)
logger.error("{} just set {} to {} in koboldai_vars. That variable isn't defined!".format(inspect.stack()[1].function, name, value))
class system_settings(settings):
local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
@@ -1211,7 +1209,7 @@ class system_settings(settings):
'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold',
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer',
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
settings_name = "system"
@@ -1237,7 +1235,7 @@ class system_settings(settings):
self.corescript = "default.lua" # Filename of corescript to load
self.gpu_device = 0 # Which PyTorch device to use when using pure GPU generation
self.savedir = os.getcwd()+"\\stories"
self.hascuda = False # Whether torch has detected CUDA on the system
self.hascuda = torch.cuda.is_available() # Whether torch has detected CUDA on the system
self.usegpu = False # Whether to launch pipeline with GPU support
self.splist = []
self.spselect = "" # Temporary storage for soft prompt filename to load

View File

@@ -169,6 +169,18 @@ class InferenceModel:
]
self.tokenizer = None
self.capabilties = ModelCapabilities()
self.model_name = "Not Defined"
def is_valid(self, model_name, model_path, menu_path, vram):
return True
def requested_parameters(self, model_name, model_path, menu_path, vram):
return {}
def set_input_parameters(self, parameters):
for parameter in parameters:
setattr(self, parameter, parameters[parameter])
return
def load(self, save_model: bool = False, initial_load: bool = False) -> None:
"""User-facing load function. Do not override this; try `_load()` instead."""
@@ -176,12 +188,19 @@ class InferenceModel:
self._pre_load()
self._load(save_model=save_model, initial_load=initial_load)
self._post_load()
self._save_settings()
def unload(self):
return
def _pre_load(self) -> None:
"""Pre load hook. Called before `_load()`."""
def _post_load(self) -> None:
"""Post load hook. Called after `_load()`."""
def _save_settings(self) -> None:
"""Save settings hook. Called after `_post_load()`."""
def _load(self, save_model: bool, initial_load: bool) -> None:
"""Main load method. All logic related to loading the model onto the

View File

@@ -6,6 +6,7 @@ import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
@@ -17,15 +18,42 @@ from modeling.inference_model import (
ModelCapabilities,
)
model_backend_name = "KoboldAI API"
class APIException(Exception):
"""To be used for errors when using the Kobold API as an interface."""
class APIInferenceModel(InferenceModel):
def __init__(self, base_url: str) -> None:
class model_backend(InferenceModel):
def __init__(self) -> None:
super().__init__()
self.base_url = base_url.rstrip("/")
self.base_url = ""
self.model_name = "KoboldAI API"
def is_valid(self, model_name, model_path, menu_path):
return model_name == "API"
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
with open("settings/api.model_backend.settings", "r") as f:
self.base_url = json.load(f)['base_url']
requested_parameters = []
requested_parameters.append({
"uitype": "text",
"unit": "text",
"label": "URL",
"id": "base_url",
"default": self.base_url,
"check": {"value": "", 'check': "!="},
"tooltip": "The URL of the KoboldAI API to connect to.",
"menu_path": "",
"extra_classes": "",
"refresh_model_inputs": False
})
return requested_parameters
def set_input_parameters(self, parameters):
self.base_url = parameters['base_url'].rstrip("/")
def _load(self, save_model: bool, initial_load: bool) -> None:
tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"]
@@ -35,6 +63,10 @@ class APIInferenceModel(InferenceModel):
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
def _save_settings(self):
with open("settings/api.model_backend.settings", "w") as f:
json.dump({"base_url": self.base_url}, f, indent="")
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],

View File

@@ -4,6 +4,7 @@ import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
@@ -15,19 +16,54 @@ from modeling.inference_model import (
)
model_backend_name = "KoboldAI Old Colab Method"
class BasicAPIException(Exception):
"""To be used for errors when using the Basic API as an interface."""
class BasicAPIInferenceModel(InferenceModel):
class model_backend(InferenceModel):
def __init__(self) -> None:
super().__init__()
self.colaburl = ""
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
def is_valid(self, model_name, model_path, menu_path):
return model_name == "Colab"
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
with open("settings/api.model_backend.settings", "r") as f:
self.colaburl = json.load(f)['base_url']
requested_parameters = []
requested_parameters.append({
"uitype": "text",
"unit": "text",
"label": "URL",
"id": "colaburl",
"default": self.colaburl,
"check": {"value": "", 'check': "!="},
"tooltip": "The URL of the Colab KoboldAI API to connect to.",
"menu_path": "",
"extra_classes": "",
"refresh_model_inputs": False
})
return requested_parameters
def set_input_parameters(self, parameters):
self.colaburl = parameters['colaburl']
def _initialize_model(self):
return
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
def _save_settings(self):
with open("settings/basic_api.model_backend.settings", "w") as f:
json.dump({"colaburl": self.colaburl}, f, indent="")
def _raw_generate(
self,
@@ -68,7 +104,7 @@ class BasicAPIInferenceModel(InferenceModel):
}
# Create request
req = requests.post(utils.koboldai_vars.colaburl, json=reqdata)
req = requests.post(self.colaburl, json=reqdata)
if req.status_code != 200:
raise BasicAPIException(f"Bad status code {req.status_code}")

View File

@@ -22,8 +22,13 @@ except ModuleNotFoundError as e:
from modeling.inference_models.hf_torch import HFTorchInferenceModel
model_backend_name = "Huggingface"
class GenericHFTorchInferenceModel(HFTorchInferenceModel):
class model_backend(HFTorchInferenceModel):
def _initialize_model(self):
return
def _load(self, save_model: bool, initial_load: bool) -> None:
utils.koboldai_vars.allowsp = True
@@ -36,9 +41,9 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
if self.model_name == "NeoCustom":
self.model_name = os.path.basename(
os.path.normpath(utils.koboldai_vars.custmodpth)
os.path.normpath(self.path)
)
utils.koboldai_vars.model = self.model_name
utils.koboldai_vars.model = self.model_name
# If we specify a model and it's in the root directory, we need to move
# it to the models directory (legacy folder structure to new)
@@ -54,7 +59,7 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
"low_cpu_mem_usage": True,
}
if utils.koboldai_vars.model_type == "gpt2":
if self.model_type == "gpt2":
# We must disable low_cpu_mem_usage and if using a GPT-2 model
# because GPT-2 is not compatible with this feature yet.
tf_kwargs.pop("low_cpu_mem_usage", None)
@@ -64,12 +69,14 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
# If we're using torch_lazy_loader, we need to get breakmodel config
# early so that it knows where to load the individual model tensors
logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
if (
self.lazy_load
and utils.koboldai_vars.hascuda
and utils.koboldai_vars.breakmodel
and not utils.koboldai_vars.nobreakmodel
and self.breakmodel
and not self.nobreakmodel
):
logger.debug("loading breakmodel")
self.breakmodel_device_config(self.model_config)
if self.lazy_load:
@@ -241,11 +248,12 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
self.patch_embedding()
if utils.koboldai_vars.hascuda:
if utils.koboldai_vars.usegpu:
if self.usegpu:
# Use just VRAM
self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
elif utils.koboldai_vars.breakmodel:
elif self.breakmodel:
# Use both RAM and VRAM (breakmodel)
if not self.lazy_load:
self.breakmodel_device_config(self.model.config)
@@ -260,6 +268,11 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
self._move_to_devices()
else:
self.model = self.model.to("cpu").float()
self.model.kai_model = self
utils.koboldai_vars.modeldim = self.get_hidden_size()
def _save_settings(self):
with open("settings/{}.generic_hf_torch.model_backend.settings".format(self.model_name.replace("/", "_")), "w") as f:
json.dump({"layers": self.layers if 'layers' in vars(self) else [], "disk_layers": self.disk_layers if 'disk_layers' in vars(self) else 0}, f, indent="")

View File

@@ -0,0 +1,33 @@
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
)
from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
model_backend_name = "GooseAI"
class OpenAIAPIError(Exception):
def __init__(self, error_type: str, error_message) -> None:
super().__init__(f"{error_type}: {error_message}")
class model_backend(openai_gooseai_model_backend):
"""InferenceModel for interfacing with OpenAI's generation API."""
def __init__(self):
super().__init__()
self.url = "https://api.goose.ai/v1/engines"
self.source = "GooseAI"
def is_valid(self, model_name, model_path, menu_path):
return model_name == "GooseAI"

View File

@@ -1,25 +1,225 @@
import os
import os, sys
from typing import Optional
from transformers import AutoConfig
import warnings
import utils
import json
import koboldai_settings
from logger import logger
from modeling.inference_model import InferenceModel
import torch
import gc
class HFInferenceModel(InferenceModel):
def __init__(self, model_name: str) -> None:
def __init__(self) -> None:
super().__init__()
self.model_config = None
self.model_name = model_name
#self.model_name = model_name
self.model = None
self.tokenizer = None
self.badwordsids = koboldai_settings.badwordsids_default
self.usegpu = False
def is_valid(self, model_name, model_path, menu_path):
try:
if model_path is not None and os.path.exists(model_path):
self.model_config = AutoConfig.from_pretrained(model_path)
elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
else:
self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
return True
except:
return False
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
requested_parameters = []
if not self.hf_torch:
return []
if model_name == 'customhuggingface':
requested_parameters.append({
"uitype": "text",
"unit": "text",
"label": "Huggingface Model Name",
"id": "custom_model_name",
"default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "",
"check": {"value": "", 'check': "!="},
"tooltip": "Model name from https://huggingface.co/",
"menu_path": "",
"refresh_model_inputs": True,
"extra_classes": ""
})
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name
if model_path is not None and os.path.exists(model_path):
self.model_config = AutoConfig.from_pretrained(model_path)
elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
else:
self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel
if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
temp = json.load(f)
break_values = temp['layers'] if 'layers' in temp else [layer_count]
disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
else:
break_values = [layer_count]
disk_blocks = 0
break_values = [int(x) for x in break_values if x != '' and x is not None]
gpu_count = torch.cuda.device_count()
break_values += [0] * (gpu_count - len(break_values))
if disk_blocks is not None:
break_values += [int(disk_blocks)]
requested_parameters.append({
"uitype": "Valid Display",
"unit": "text",
"label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value
"id": "valid_layers",
"max": layer_count,
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
"menu_path": "Layers",
"extra_classes": "",
"refresh_model_inputs": False
})
for i in range(gpu_count):
requested_parameters.append({
"uitype": "slider",
"unit": "int",
"label": "{} Layers".format(torch.cuda.get_device_name(i)),
"id": "{}_Layers".format(i),
"min": 0,
"max": layer_count,
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
"default": break_values[i],
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
"menu_path": "Layers",
"extra_classes": "",
"refresh_model_inputs": False
})
requested_parameters.append({
"uitype": "slider",
"unit": "int",
"label": "CPU Layers",
"id": "CPU_Layers",
"min": 0,
"max": layer_count,
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
"default": layer_count - sum(break_values),
"tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
"menu_path": "Layers",
"extra_classes": "",
"refresh_model_inputs": False
})
if disk_blocks is not None:
requested_parameters.append({
"uitype": "slider",
"unit": "int",
"label": "Disk Layers",
"id": "Disk_Layers",
"min": 0,
"max": layer_count,
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
"default": disk_blocks,
"tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
"menu_path": "Layers",
"extra_classes": "",
"refresh_model_inputs": False
})
else:
requested_parameters.append({
"uitype": "toggle",
"unit": "bool",
"label": "Use GPU",
"id": "use_gpu",
"default": True,
"tooltip": "Whether or not to use the GPU",
"menu_path": "Layers",
"extra_classes": "",
"refresh_model_inputs": False
})
return requested_parameters
def set_input_parameters(self, parameters):
if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2":
import breakmodel
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
gpu_count = torch.cuda.device_count()
layers = []
for i in range(gpu_count):
if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
layers.append(int(parameters["{}_Layers".format(i)]))
elif isinstance(parameters["{}_Layers".format(i)], str):
layers.append(None)
else:
layers.append(parameters["{}_Layers".format(i)])
self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
if isinstance(self.cpu_layers, str):
self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
self.layers = layers
self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0
if isinstance(self.disk_layers, str):
self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
breakmodel.gpu_blocks = layers
breakmodel.disk_blocks = self.disk_layers
self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
self.model_type = self.get_model_type()
self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
self.lazy_load = True
logger.debug("Model type: {}".format(self.model_type))
else:
logger.debug("Disabling breakmodel and lazyload")
self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
self.breakmodel = False
self.lazy_load = False
logger.info(parameters)
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
self.path = parameters['path'] if 'path' in parameters else None
def unload(self):
if hasattr(self, 'model'):
self.model = None
if hasattr(self, 'tokenizer'):
self.tokenizer = None
if hasattr(self, 'model_config'):
self.model_config = None
with torch.no_grad():
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
for tensor in gc.get_objects():
try:
if torch.is_tensor(tensor):
tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
except:
pass
gc.collect()
try:
with torch.no_grad():
torch.cuda.empty_cache()
except:
pass
def _post_load(self) -> None:
self.badwordsids = koboldai_settings.badwordsids_default
self.model_type = str(self.model_config.model_type)
# These are model specific tokenizer overrides if a model has bad defaults
if utils.koboldai_vars.model_type == "llama":
if self.model_type == "llama":
# Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
self.tokenizer.add_bos_token = False
@@ -103,32 +303,32 @@ class HFInferenceModel(InferenceModel):
return result
object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
elif utils.koboldai_vars.model_type == "opt":
elif self.model_type == "opt":
self.tokenizer._koboldai_header = self.tokenizer.encode("")
self.tokenizer.add_bos_token = False
self.tokenizer.add_prefix_space = False
# Change newline behavior to match model quirks
if utils.koboldai_vars.model_type == "xglm":
if self.model_type == "xglm":
# Default to </s> newline mode if using XGLM
utils.koboldai_vars.newlinemode = "s"
elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
elif self.model_type in ["opt", "bloom"]:
# Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
utils.koboldai_vars.newlinemode = "ns"
# Clean up tokens that cause issues
if (
utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
self.badwordsids == koboldai_settings.badwordsids_default
and self.model_type not in ("gpt2", "gpt_neo", "gptj")
):
utils.koboldai_vars.badwordsids = [
self.badwordsids = [
[v]
for k, v in self.tokenizer.get_vocab().items()
if any(c in str(k) for c in "[]")
]
if utils.koboldai_vars.newlinemode == "n":
utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
self.badwordsids.append([self.tokenizer.eos_token_id])
return super()._post_load()
@@ -139,9 +339,12 @@ class HFInferenceModel(InferenceModel):
Returns a string of the model's path locally, or None if it is not downloaded.
If ignore_existance is true, it will always return a path.
"""
if self.path is not None:
if os.path.exists(self.path):
return self.path
if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
model_path = utils.koboldai_vars.custmodpth
model_path = self.path
assert model_path
# Path can be absolute or relative to models directory
@@ -158,7 +361,7 @@ class HFInferenceModel(InferenceModel):
return model_path
basename = utils.koboldai_vars.model.replace("/", "_")
basename = self.model_name.replace("/", "_")
if legacy:
ret = basename
else:
@@ -176,15 +379,15 @@ class HFInferenceModel(InferenceModel):
revision=utils.koboldai_vars.revision,
cache_dir="cache",
)
utils.koboldai_vars.model_type = self.model_config.model_type
self.model_type = self.model_config.model_type
except ValueError:
utils.koboldai_vars.model_type = {
self.model_type = {
"NeoCustom": "gpt_neo",
"GPT2Custom": "gpt2",
}.get(utils.koboldai_vars.model)
}.get(self.model)
if not utils.koboldai_vars.model_type:
if not self.model_type:
logger.warning(
"No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
)
utils.koboldai_vars.model_type = "gpt_neo"
self.model_type = "gpt_neo"

View File

@@ -19,18 +19,16 @@ from modeling.inference_model import (
from modeling.inference_models.hf import HFInferenceModel
from modeling.tokenizer import GenericTokenizer
# This file shouldn't be imported unless using the TPU
assert utils.koboldai_vars.use_colab_tpu
import tpu_mtj_backend
model_backend_name = "Huggingface MTJ"
class HFMTJInferenceModel(HFInferenceModel):
class model_backend(HFInferenceModel):
def __init__(
self,
model_name: str,
#model_name: str,
) -> None:
super().__init__(model_name)
super().__init__()
self.hf_torch = False
self.model_config = None
self.capabilties = ModelCapabilities(
embedding_manipulation=False,
@@ -39,8 +37,13 @@ class HFMTJInferenceModel(HFInferenceModel):
post_token_probs=False,
uses_tpu=True,
)
def is_valid(self, model_name, model_path, menu_path):
# This file shouldn't be imported unless using the TPU
return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path)
def setup_mtj(self) -> None:
import tpu_mtj_backend
def mtj_warper_callback(scores) -> "np.array":
scores_shape = scores.shape
scores_list = scores.tolist()
@@ -147,7 +150,7 @@ class HFMTJInferenceModel(HFInferenceModel):
tpu_mtj_backend.socketio = utils.socketio
if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
if self.model_name == "TPUMeshTransformerGPTNeoX":
utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox
print(
@@ -155,7 +158,7 @@ class HFMTJInferenceModel(HFInferenceModel):
Colors.PURPLE, Colors.END
)
)
if utils.koboldai_vars.model in (
if self.model_name in (
"TPUMeshTransformerGPTJ",
"TPUMeshTransformerGPTNeoX",
) and (
@@ -165,7 +168,7 @@ class HFMTJInferenceModel(HFInferenceModel):
raise FileNotFoundError(
f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder"
)
if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
if self.model_name == "TPUMeshTransformerGPTNeoX":
tpu_mtj_backend.pad_token_id = 2
tpu_mtj_backend.koboldai_vars = utils.koboldai_vars
@@ -176,13 +179,15 @@ class HFMTJInferenceModel(HFInferenceModel):
tpu_mtj_backend.settings_callback = mtj_settings_callback
def _load(self, save_model: bool, initial_load: bool) -> None:
import tpu_mtj_backend
self.setup_mtj()
self.init_model_config()
utils.koboldai_vars.allowsp = True
logger.info(self.model_name)
tpu_mtj_backend.load_model(
utils.koboldai_vars.model,
hf_checkpoint=utils.koboldai_vars.model
self.model_name,
hf_checkpoint=self.model_name
not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
and utils.koboldai_vars.use_colab_tpu,
socketio_queue=koboldai_settings.queue,
@@ -198,7 +203,7 @@ class HFMTJInferenceModel(HFInferenceModel):
if (
utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
and self.model_type not in ("gpt2", "gpt_neo", "gptj")
):
utils.koboldai_vars.badwordsids = [
[v]
@@ -207,6 +212,7 @@ class HFMTJInferenceModel(HFInferenceModel):
]
def get_soft_tokens(self) -> np.array:
import tpu_mtj_backend
soft_tokens = None
if utils.koboldai_vars.sp is None:
@@ -258,6 +264,7 @@ class HFMTJInferenceModel(HFInferenceModel):
seed: Optional[int] = None,
**kwargs,
) -> GenerationResult:
import tpu_mtj_backend
warpers.update_settings()
soft_tokens = self.get_soft_tokens()

View File

@@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False
class HFTorchInferenceModel(HFInferenceModel):
def __init__(
self,
model_name: str,
lazy_load: bool,
low_mem: bool,
) -> None:
super().__init__(model_name)
self.lazy_load = lazy_load
self.low_mem = low_mem
def __init__(self) -> None:
super().__init__()
self.hf_torch = True
self.lazy_load = True
self.low_mem = False
self.nobreakmodel = False
self.post_token_hooks = [
PostTokenHooks.stream_tokens,
@@ -128,7 +125,19 @@ class HFTorchInferenceModel(HFInferenceModel):
else:
return "Unknown"
def get_auxilary_device(self):
"""Get device auxilary tensors like inputs should be stored on."""
# NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
if utils.koboldai_vars.hascuda and self.usegpu:
return utils.koboldai_vars.gpu_device
elif utils.koboldai_vars.hascuda and self.breakmodel:
import breakmodel
return breakmodel.primary_device
return "cpu"
def _post_load(m_self) -> None:
if not utils.koboldai_vars.model_type:
utils.koboldai_vars.model_type = m_self.get_model_type()
@@ -211,40 +220,6 @@ class HFTorchInferenceModel(HFInferenceModel):
new_sample.old_sample = transformers.GenerationMixin.sample
use_core_manipulations.sample = new_sample
# PEFT Loading. This MUST be done after all save_pretrained calls are
# finished on the main model.
if utils.args.peft:
from peft import PeftModel, PeftConfig
local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
# Make PEFT dir if it doesn't exist
try:
os.makedirs(local_peft_dir)
except FileExistsError:
pass
peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
peft_installed_locally = True
possible_peft_locations = [peft_local_path, utils.args.peft]
for i, location in enumerate(possible_peft_locations):
try:
m_self.model = PeftModel.from_pretrained(m_self.model, location)
logger.debug(f"Loaded PEFT at '{location}'")
break
except ValueError:
peft_installed_locally = False
if i == len(possible_peft_locations) - 1:
raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
except RuntimeError:
raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
if not peft_installed_locally:
logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
m_self.model.save_pretrained(peft_local_path)
return super()._post_load()
def _raw_generate(
@@ -262,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel):
else:
gen_in = prompt_tokens
device = utils.get_auxilary_device()
device = self.get_auxilary_device()
gen_in = gen_in.to(device)
additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
@@ -272,19 +247,14 @@ class HFTorchInferenceModel(HFInferenceModel):
with torch.no_grad():
start_time = time.time()
# HEED & BEWARE: All arguments passed to self.model.generate MUST be
# kwargs; see https://github.com/huggingface/peft/issues/232. If they
# aren't, PeftModel will EXPLODE!!!! But nothing will happen without
# a PEFT loaded so it's sneaky.
genout = self.model.generate(
input_ids=gen_in,
gen_in,
do_sample=True,
max_length=min(
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
),
repetition_penalty=1.0,
bad_words_ids=utils.koboldai_vars.badwordsids
bad_words_ids=self.badwordsids
+ additional_bad_words_ids,
use_cache=True,
num_return_sequences=batch_count,
@@ -304,7 +274,6 @@ class HFTorchInferenceModel(HFInferenceModel):
def _get_model(self, location: str, tf_kwargs: Dict):
tf_kwargs["revision"] = utils.koboldai_vars.revision
tf_kwargs["cache_dir"] = "cache"
tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
# If we have model hints for legacy model, use them rather than fall back.
try:
@@ -444,8 +413,6 @@ class HFTorchInferenceModel(HFInferenceModel):
if not self.lazy_load:
return
if utils.args.breakmodel_disklayers is not None:
breakmodel.disk_blocks = utils.args.breakmodel_disklayers
disk_blocks = breakmodel.disk_blocks
gpu_blocks = breakmodel.gpu_blocks
@@ -489,10 +456,10 @@ class HFTorchInferenceModel(HFInferenceModel):
):
device_map[key] = (
utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
if utils.koboldai_vars.hascuda and self.usegpu
else "cpu"
if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel
or not self.breakmodel
else breakmodel.primary_device
)
else:
@@ -508,12 +475,12 @@ class HFTorchInferenceModel(HFInferenceModel):
)
device = (
utils.koboldai_vars.gpu_device
if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
if utils.koboldai_vars.hascuda and self.usegpu
else "disk"
if layer < disk_blocks and layer < ram_blocks
else "cpu"
if not utils.koboldai_vars.hascuda
or not utils.koboldai_vars.breakmodel
or not self.breakmodel
else "shared"
if layer < ram_blocks
else bisect.bisect_right(
@@ -607,6 +574,7 @@ class HFTorchInferenceModel(HFInferenceModel):
)
)
# print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
#logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
model_dict[key] = model_dict[key].materialize(
f, map_location="cpu"
)
@@ -617,15 +585,15 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
and model_dict[key].dtype is torch.float32
):
model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
and model_dict[key].dtype is torch.float16
):
model_dict[key] = model_dict[key].to(torch.float32)
@@ -663,14 +631,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
):
dtype = torch.float16
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
):
dtype = torch.float32
if (
@@ -726,16 +694,16 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
and model_dict[key].dtype is torch.float32
):
model_dict[key] = model_dict[key].to(torch.float16)
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
and model_dict[key].dtype is torch.float16
):
model_dict[key] = model_dict[key].to(torch.float32)
@@ -774,14 +742,14 @@ class HFTorchInferenceModel(HFInferenceModel):
and breakmodel.primary_device != "cpu"
and utils.koboldai_vars.hascuda
and (
utils.koboldai_vars.breakmodel
or utils.koboldai_vars.usegpu
self.breakmodel
or self.usegpu
)
):
dtype = torch.float16
if breakmodel.primary_device == "cpu" or (
not utils.koboldai_vars.usegpu
and not utils.koboldai_vars.breakmodel
not self.usegpu
and not self.breakmodel
):
dtype = torch.float32
if (
@@ -815,7 +783,7 @@ class HFTorchInferenceModel(HFInferenceModel):
if always_use or (
utils.koboldai_vars.hascuda
and self.low_mem
and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
and (self.usegpu or self.breakmodel)
):
original_dtype = torch.get_default_dtype()
torch.set_default_dtype(torch.float16)
@@ -830,6 +798,8 @@ class HFTorchInferenceModel(HFInferenceModel):
device_count = torch.cuda.device_count()
if device_count < 2:
primary = None
logger.debug("n_layers: {}".format(n_layers))
logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
gpu_blocks = breakmodel.gpu_blocks + (
device_count - len(breakmodel.gpu_blocks)
) * [0]
@@ -860,155 +830,47 @@ class HFTorchInferenceModel(HFInferenceModel):
n_layers = utils.num_layers(config)
logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
if utils.args.cpu:
breakmodel.gpu_blocks = [0] * n_layers
return
elif (
utils.args.breakmodel_gpulayers is not None
or utils.args.breakmodel_disklayers is not None
):
try:
if not utils.args.breakmodel_gpulayers:
breakmodel.gpu_blocks = []
else:
breakmodel.gpu_blocks = list(
map(int, utils.args.breakmodel_gpulayers.split(","))
)
assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
s = n_layers
for i in range(len(breakmodel.gpu_blocks)):
if breakmodel.gpu_blocks[i] <= -1:
breakmodel.gpu_blocks[i] = s
break
else:
s -= breakmodel.gpu_blocks[i]
assert sum(breakmodel.gpu_blocks) <= n_layers
n_layers -= sum(breakmodel.gpu_blocks)
if utils.args.breakmodel_disklayers is not None:
assert utils.args.breakmodel_disklayers <= n_layers
breakmodel.disk_blocks = utils.args.breakmodel_disklayers
n_layers -= utils.args.breakmodel_disklayers
except:
logger.warning(
"--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
)
breakmodel.gpu_blocks = [n_layers]
n_layers = 0
elif utils.args.breakmodel_layers is not None:
breakmodel.gpu_blocks = [
n_layers - max(0, min(n_layers, utils.args.breakmodel_layers))
]
n_layers -= sum(breakmodel.gpu_blocks)
elif utils.args.model is not None:
elif breakmodel.gpu_blocks == []:
logger.info("Breakmodel not specified, assuming GPU 0")
breakmodel.gpu_blocks = [n_layers]
n_layers = 0
else:
device_count = torch.cuda.device_count()
if device_count > 1:
print(
Colors.CYAN
+ "\nPlease select one of your GPUs to be your primary GPU."
)
print(
"VRAM usage in your primary GPU will be higher than for your other ones."
)
print("It is recommended you make your fastest GPU your primary GPU.")
self.breakmodel_device_list(n_layers)
while True:
primaryselect = input("device ID> ")
if (
primaryselect.isnumeric()
and 0 <= int(primaryselect) < device_count
):
breakmodel.primary_device = int(primaryselect)
break
else:
print(
f"{Colors.RED}Please enter an integer between 0 and {device_count-1}.{Colors.END}"
)
else:
breakmodel.primary_device = 0
print(
Colors.PURPLE
+ "\nIf you don't have enough VRAM to run the model on a single GPU"
)
print(
"you can split the model between your CPU and your GPU(s), or between"
)
print("multiple GPUs if you have more than one.")
print("By putting more 'layers' on a GPU or CPU, more computations will be")
print(
"done on that device and more VRAM or RAM will be required on that device"
)
print("(roughly proportional to number of layers).")
print(
"It should be noted that GPUs are orders of magnitude faster than the CPU."
)
print(
f"This model has{Colors.YELLOW} {n_layers} {Colors.PURPLE}layers.{Colors.END}\n"
)
for i in range(device_count):
self.breakmodel_device_list(
n_layers, primary=breakmodel.primary_device, selected=i
)
print(
f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into device {i}?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
)
while True:
layerselect = input("# of layers> ")
if (
layerselect.isnumeric() or layerselect.strip() == "-1"
) and -1 <= int(layerselect) <= n_layers:
layerselect = int(layerselect)
layerselect = n_layers if layerselect == -1 else layerselect
breakmodel.gpu_blocks.append(layerselect)
n_layers -= layerselect
break
else:
print(
f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
)
if n_layers == 0:
s = n_layers
for i in range(len(breakmodel.gpu_blocks)):
if breakmodel.gpu_blocks[i] <= -1:
breakmodel.gpu_blocks[i] = s
break
if n_layers > 0:
self.breakmodel_device_list(
n_layers, primary=breakmodel.primary_device, selected=-1
)
print(
f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into the disk cache?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
)
while True:
layerselect = input("# of layers> ")
if (
layerselect.isnumeric() or layerselect.strip() == "-1"
) and -1 <= int(layerselect) <= n_layers:
layerselect = int(layerselect)
layerselect = n_layers if layerselect == -1 else layerselect
breakmodel.disk_blocks = layerselect
n_layers -= layerselect
break
else:
print(
f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
)
else:
s -= breakmodel.gpu_blocks[i]
assert sum(breakmodel.gpu_blocks) <= n_layers
n_layers -= sum(breakmodel.gpu_blocks)
if breakmodel.disk_blocks is not None:
assert breakmodel.disk_blocks <= n_layers
n_layers -= breakmodel.disk_blocks
logger.init_ok("Final device configuration:", status="Info")
self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file:
file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks))
# If all layers are on the same device, use the old GPU generation mode
while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
breakmodel.gpu_blocks.pop()
self.breakmodel = True
if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
-1,
utils.num_layers(config),
):
utils.koboldai_vars.breakmodel = False
utils.koboldai_vars.usegpu = True
logger.debug("All layers on same GPU. Breakmodel disabled")
self.breakmodel = False
self.usegpu = True
utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
return
@@ -1017,6 +879,6 @@ class HFTorchInferenceModel(HFInferenceModel):
import breakmodel
breakmodel.primary_device = "cpu"
utils.koboldai_vars.breakmodel = False
utils.koboldai_vars.usegpu = False
self.breakmodel = False
self.usegpu = False
return

View File

@@ -1,10 +1,11 @@
from __future__ import annotations
import time
import time, json
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
@@ -16,25 +17,131 @@ from modeling.inference_model import (
ModelCapabilities,
)
model_backend_name = "Horde"
class HordeException(Exception):
"""To be used for errors on server side of the Horde."""
class HordeInferenceModel(InferenceModel):
class model_backend(InferenceModel):
def __init__(self) -> None:
super().__init__()
self.url = "https://horde.koboldai.net"
self.key = "0000000000"
self.models = self.get_cluster_models()
self.model_name = "Horde"
self.model = []
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
def is_valid(self, model_name, model_path, menu_path):
logger.debug("Horde Models: {}".format(self.models))
return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
with open("settings/horde.model_backend.settings", "r") as f:
temp = json.load(f)
self.base_url = temp['url']
self.key = temp['key']
if 'key' in parameters:
self.key = parameters['key']
if 'url' in parameters:
self.url = parameters['url']
requested_parameters = []
requested_parameters.extend([{
"uitype": "text",
"unit": "text",
"label": "URL",
"id": "url",
"default": self.url if 'url' not in parameters else parameters['url'],
"tooltip": "URL to the horde.",
"menu_path": "",
"check": {"value": "", 'check': "!="},
"refresh_model_inputs": True,
"extra_classes": ""
},
{
"uitype": "text",
"unit": "text",
"label": "Key",
"id": "key",
"default": self.key if 'key' not in parameters else parameters['key'],
"check": {"value": "", 'check': "!="},
"tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
"menu_path": "",
"refresh_model_inputs": True,
"extra_classes": ""
},
{
"uitype": "dropdown",
"unit": "text",
"label": "Model",
"id": "model",
"default": model_name,
"check": {"value": "", 'check': "!="},
'multiple': True,
"tooltip": "Which model to use when running OpenAI/GooseAI.",
"menu_path": "",
"refresh_model_inputs": False,
"extra_classes": "",
'children': self.models,
}])
return requested_parameters
def set_input_parameters(self, parameters):
self.key = parameters['key'].strip()
self.model = parameters['model']
self.url = parameters['url']
def get_cluster_models(self):
# Get list of models from public cluster
try:
req = requests.get(f"{self.url}/api/v2/status/models?type=text")
except:
logger.init_err("KAI Horde Models", status="Failed")
logger.error("Provided KoboldAI Horde URL unreachable")
emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
return
if not req.ok:
# Something went wrong, print the message and quit since we can't initialize an engine
logger.init_err("KAI Horde Models", status="Failed")
logger.error(req.json())
emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
return
engines = req.json()
try:
engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
except:
logger.error(engines)
raise
logger.debug(engines)
online_model = ""
logger.init_ok("KAI Horde Models", status="OK")
return engines
def _load(self, save_model: bool, initial_load: bool) -> None:
tokenizer_name = "gpt2"
if len(self.model) > 0:
if self.model[0] == "all" and len(self.model) > 1:
tokenizer_name = self.model[1]
else:
tokenizer_name = self.model[0]
self.tokenizer = self._get_tokenizer(
utils.koboldai_vars.cluster_requested_models[0]
if len(utils.koboldai_vars.cluster_requested_models) > 0
else "gpt2",
tokenizer_name
)
def _save_settings(self):
with open("settings/horde.model_backend.settings", "w") as f:
json.dump({"key": self.key, "url": self.url}, f, indent="")
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],
@@ -80,14 +187,14 @@ class HordeInferenceModel(InferenceModel):
client_agent = "KoboldAI:2.0.0:koboldai.org"
cluster_headers = {
"apikey": utils.koboldai_vars.horde_api_key,
"apikey": self.key,
"Client-Agent": client_agent,
}
try:
# Create request
req = requests.post(
f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async",
f"{self.url}/api/v2/generate/text/async",
json=cluster_metadata,
headers=cluster_headers,
)
@@ -125,7 +232,7 @@ class HordeInferenceModel(InferenceModel):
while not finished:
try:
req = requests.get(
f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}",
f"{self.url}/api/v2/generate/text/status/{request_id}",
headers=cluster_agent_headers,
)
except requests.exceptions.ConnectionError:

View File

@@ -1,106 +0,0 @@
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
)
class OpenAIAPIError(Exception):
def __init__(self, error_type: str, error_message) -> None:
super().__init__(f"{error_type}: {error_message}")
class OpenAIAPIInferenceModel(InferenceModel):
"""InferenceModel for interfacing with OpenAI's generation API."""
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer("gpt2")
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],
max_new: int,
gen_settings: GenerationSettings,
single_line: bool = False,
batch_count: int = 1,
seed: Optional[int] = None,
**kwargs,
) -> GenerationResult:
if seed is not None:
logger.warning(
"Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
)
decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
# Store context in memory to use it for comparison with generated content
utils.koboldai_vars.lastctx = decoded_prompt
# Build request JSON data
# GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
# as the koboldai_vars.model will always be OAI
if "GooseAI" in utils.koboldai_vars.configname:
reqdata = {
"prompt": decoded_prompt,
"max_tokens": max_new,
"temperature": gen_settings.temp,
"top_a": gen_settings.top_a,
"top_p": gen_settings.top_p,
"top_k": gen_settings.top_k,
"tfs": gen_settings.tfs,
"typical_p": gen_settings.typical,
"repetition_penalty": gen_settings.rep_pen,
"repetition_penalty_slope": gen_settings.rep_pen_slope,
"repetition_penalty_range": gen_settings.rep_pen_range,
"n": batch_count,
# TODO: Implement streaming
"stream": False,
}
else:
reqdata = {
"prompt": decoded_prompt,
"max_tokens": max_new,
"temperature": gen_settings.temp,
"top_p": gen_settings.top_p,
"frequency_penalty": gen_settings.rep_pen,
"n": batch_count,
"stream": False,
}
req = requests.post(
utils.koboldai_vars.oaiurl,
json=reqdata,
headers={
"Authorization": "Bearer " + utils.koboldai_vars.oaiapikey,
"Content-Type": "application/json",
},
)
j = req.json()
if not req.ok:
# Send error message to web client
if "error" in j:
error_type = j["error"]["type"]
error_message = j["error"]["message"]
else:
error_type = "Unknown"
error_message = "Unknown"
raise OpenAIAPIError(error_type, error_message)
outputs = [out["text"] for out in j["choices"]]
return GenerationResult(
model=self,
out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
prompt=prompt_tokens,
is_whole_generation=True,
single_line=single_line,
)

View File

@@ -0,0 +1,33 @@
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
)
from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
model_backend_name = "OpenAI"
class OpenAIAPIError(Exception):
def __init__(self, error_type: str, error_message) -> None:
super().__init__(f"{error_type}: {error_message}")
class model_backend(openai_gooseai_model_backend):
"""InferenceModel for interfacing with OpenAI's generation API."""
def __init__(self):
super().__init__()
self.url = "https://api.openai.com/v1/engines"
self.source = "OpenAI"
def is_valid(self, model_name, model_path, menu_path):
return model_name == "OAI"

View File

@@ -0,0 +1,199 @@
import torch
import requests,json
import numpy as np
from typing import List, Optional, Union
import os
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
)
class OpenAIAPIError(Exception):
def __init__(self, error_type: str, error_message) -> None:
super().__init__(f"{error_type}: {error_message}")
class model_backend(InferenceModel):
"""InferenceModel for interfacing with OpenAI's generation API."""
def __init__(self):
super().__init__()
self.key = ""
self.url = "https://api.goose.ai/v1/engines"
def is_valid(self, model_name, model_path, menu_path):
return model_name == "OAI" or model_name == "GooseAI"
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
try:
self.key = json.load(f)['key']
except:
pass
if 'key' in parameters:
self.key = parameters['key']
self.source = model_name
requested_parameters = []
requested_parameters.extend([{
"uitype": "text",
"unit": "text",
"label": "Key",
"id": "key",
"default": self.key,
"check": {"value": "", 'check': "!="},
"tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
"menu_path": "",
"refresh_model_inputs": True,
"extra_classes": ""
},
{
"uitype": "dropdown",
"unit": "text",
"label": "Model",
"id": "model",
"default": "",
"check": {"value": "", 'check': "!="},
"tooltip": "Which model to use when running OpenAI/GooseAI.",
"menu_path": "",
"refresh_model_inputs": False,
"extra_classes": "",
'children': self.get_oai_models(),
}])
return requested_parameters
def set_input_parameters(self, parameters):
self.key = parameters['key'].strip()
self.model_name = parameters['model']
def get_oai_models(self):
if self.key == "":
return []
# Get list of models from OAI
logger.init("OAI Engines", status="Retrieving")
req = requests.get(
self.url,
headers = {
'Authorization': 'Bearer '+self.key
}
)
if(req.status_code == 200):
r = req.json()
engines = r["data"]
try:
engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
except:
logger.error(engines)
raise
online_model = ""
logger.init_ok("OAI Engines", status="OK")
logger.debug("OAI Engines: {}".format(engines))
return engines
else:
# Something went wrong, print the message and quit since we can't initialize an engine
logger.init_err("OAI Engines", status="Failed")
logger.error(req.json())
emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
return []
def _load(self, save_model: bool, initial_load: bool) -> None:
self.tokenizer = self._get_tokenizer("gpt2")
def _save_settings(self):
with open("settings/{}.model_backend.settings".format(self.source), "w") as f:
json.dump({"key": self.key}, f, indent="")
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],
max_new: int,
gen_settings: GenerationSettings,
single_line: bool = False,
batch_count: int = 1,
seed: Optional[int] = None,
**kwargs,
) -> GenerationResult:
if seed is not None:
logger.warning(
"Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
)
decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
# Store context in memory to use it for comparison with generated content
utils.koboldai_vars.lastctx = decoded_prompt
# Build request JSON data
# GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
# as the koboldai_vars.model will always be OAI
if self.source == "GooseAI":
reqdata = {
"prompt": decoded_prompt,
"max_tokens": max_new,
"temperature": gen_settings.temp,
"top_a": gen_settings.top_a,
"top_p": gen_settings.top_p,
"top_k": gen_settings.top_k,
"tfs": gen_settings.tfs,
"typical_p": gen_settings.typical,
"repetition_penalty": gen_settings.rep_pen,
"repetition_penalty_slope": gen_settings.rep_pen_slope,
"repetition_penalty_range": gen_settings.rep_pen_range,
"n": batch_count,
# TODO: Implement streaming
"stream": False,
}
else:
reqdata = {
"prompt": decoded_prompt,
"max_tokens": max_new,
"temperature": gen_settings.temp,
"top_p": gen_settings.top_p,
"frequency_penalty": gen_settings.rep_pen,
"n": batch_count,
"stream": False,
}
req = requests.post(
"{}/{}/completions".format(self.url, self.model_name),
json=reqdata,
headers={
"Authorization": "Bearer " + self.key,
"Content-Type": "application/json",
},
)
j = req.json()
if not req.ok:
# Send error message to web client
if "error" in j:
error_type = j["error"]["type"]
error_message = j["error"]["message"]
else:
error_type = "Unknown"
error_message = "Unknown"
raise OpenAIAPIError(error_type, error_message)
outputs = [out["text"] for out in j["choices"]]
return GenerationResult(
model=self,
out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
prompt=prompt_tokens,
is_whole_generation=True,
single_line=single_line,
)

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
import torch
import requests
import numpy as np
from typing import List, Optional, Union
import utils
from logger import logger
from modeling.inference_model import (
GenerationResult,
GenerationSettings,
InferenceModel,
ModelCapabilities,
)
model_backend_name = "Read Only"
class BasicAPIException(Exception):
"""To be used for errors when using the Basic API as an interface."""
class model_backend(InferenceModel):
def __init__(self) -> None:
super().__init__()
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
self.tokenizer = self._tokenizer()
self.model = None
self.model_name = "Read Only"
def is_valid(self, model_name, model_path, menu_path):
return model_name == "ReadOnly"
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
requested_parameters = []
return requested_parameters
def set_input_parameters(self, parameters):
return
def unload(self):
utils.koboldai_vars.noai = False
def _initialize_model(self):
return
class _tokenizer():
def __init__(self):
self._koboldai_header = []
def decode(self, _input):
return ""
def encode(self, input_text):
return []
def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
self.tokenizer = self.tokenizer
self.model = None
utils.koboldai_vars.noai = True
def _raw_generate(
self,
prompt_tokens: Union[List[int], torch.Tensor],
max_new: int,
gen_settings: GenerationSettings,
single_line: bool = False,
batch_count: int = 1,
seed: Optional[int] = None,
**kwargs,
):
return GenerationResult(
model=self,
out_batches=np.array([]),
prompt=prompt_tokens,
is_whole_generation=True,
single_line=single_line,
)

View File

@@ -1,3 +1,5 @@
//=================================================================//
// VARIABLES
//=================================================================//
@@ -2333,6 +2335,8 @@ $(document).ready(function(){
socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
socket.on('popup_edit_file', function(data){popup_edit_file(data);});
socket.on('error_popup', function(data){error_popup(data);});
socket.on('open_model_load_menu', function(data){show_model_menu(data);});
socket.on('selected_model_info', function(data){selected_model_info(data);});
socket.on('from_server', function(msg) {
//console.log(msg);
@@ -3332,28 +3336,6 @@ $(document).ready(function(){
hideLoadPopup();
});
load_model_accept.on("click", function(ev) {
hideMessage();
var gpu_layers;
var message;
if($("#modellayers")[0].classList.contains('hidden')) {
gpu_layers = ","
} else {
gpu_layers = ""
for (let i=0; i < $("#gpu_count")[0].value; i++) {
gpu_layers += $("#gpu_layers"+i)[0].value + ",";
}
}
var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
models = getSelectedOptions(document.getElementById('oaimodel'));
if (models.length == 1) {
models = models[0];
}
message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
socket.send(message);
loadmodelcontent.html("");
hideLoadModelPopup();
});
sp_close.on("click", function(ev) {
hideSPPopup();
@@ -3388,8 +3370,9 @@ $(document).ready(function(){
});
button_loadmodel.on("click", function(ev) {
showLoadModelPopup();
socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
//showLoadModelPopup();
//socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
socket.emit('load_model_button', {});
});
button_showmodel.on("click", function(ev) {
socket.send({'cmd': 'show_model', 'data': ''});
@@ -3836,3 +3819,713 @@ function show_message(data) {
document.getElementById('message-popup').classList.remove('hidden');
}
//-----------------------------------------------------Copy from UI2--------------------------------------------------------
function show_model_menu(data) {
console.log(data);
//clear out the loadmodelsettings
var loadmodelsettings = document.getElementById('loadmodelsettings')
while (loadmodelsettings.firstChild) {
loadmodelsettings.removeChild(loadmodelsettings.firstChild);
}
//Clear out plugin selector
var model_plugin = document.getElementById('modelplugin');
while (model_plugin.firstChild) {
model_plugin.removeChild(model_plugin.firstChild);
}
model_plugin.classList.add("hidden");
var accept = document.getElementById("btn_loadmodelaccept");
accept.disabled = false;
//clear out the breadcrumbs
var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
while (breadcrumbs.firstChild) {
breadcrumbs.removeChild(breadcrumbs.firstChild);
}
//add breadcrumbs
if ('breadcrumbs' in data) {
for (item of data.breadcrumbs) {
var button = document.createElement("button");
button.classList.add("breadcrumbitem");
button.setAttribute("model", data.menu);
button.setAttribute("folder", item[0]);
button.textContent = item[1];
button.onclick = function () {
socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
};
breadcrumbs.append(button);
var span = document.createElement("span");
span.textContent = "\\";
breadcrumbs.append(span);
}
}
//clear out the items
var model_list = document.getElementById('loadmodellistcontent')
while (model_list.firstChild) {
model_list.removeChild(model_list.firstChild);
}
//add items
for (item of data.items) {
var list_item = document.createElement("span");
list_item.classList.add("model_item");
//create the folder icon
var folder_icon = document.createElement("span");
folder_icon.classList.add("material-icons-outlined");
folder_icon.classList.add("cursor");
let isModel = !(
item.isMenu ||
item.label === "Load a model from its directory" ||
item.label === "Load an old GPT-2 model (eg CloverEdition)"
);
folder_icon.textContent = isModel ? "psychology" : "folder";
list_item.append(folder_icon);
//create the actual item
var popup_item = document.createElement("span");
popup_item.classList.add("model");
for (const key in item) {
if (key == "name") {
popup_item.id = item[key];
}
popup_item.setAttribute(key, item[key]);
}
popup_item.onclick = function() {
var attributes = this.attributes;
var obj = {};
for (var i = 0, len = attributes.length; i < len; i++) {
obj[attributes[i].name] = attributes[i].value;
}
//put the model data on the accept button so we can send it to the server when you accept
var accept = document.getElementById("popup_accept");
selected_model_data = obj;
//send the data to the server so it can figure out what data we need from the user for the model
socket.emit('select_model', obj);
//clear out the selected item and select this one visually
for (const element of document.getElementsByClassName("model_menu_selected")) {
element.classList.remove("model_menu_selected");
}
this.closest(".model_item").classList.add("model_menu_selected");
}
//name text
var text = document.createElement("span");
text.style="grid-area: item;";
text.textContent = item.label;
popup_item.append(text);
//model size text
var text = document.createElement("span");
text.textContent = item.size;
text.style="grid-area: gpu_size;padding: 2px;";
popup_item.append(text);
(function() {
// Anon function to avoid unreasonable indentation
if (!isModel) return;
let parameterCount = getModelParameterCount(item.label);
if (!parameterCount) return;
let warningText = "";
if (parameterCount > 25_000_000_000) warningText = "This is a very high-end model and will likely not run without a specialized setup."; // 25B
if (parameterCount < 2_000_000_000) warningText = "This is a lower-end model and may perform poorly."; // 2B
if (parameterCount < 1_000_000_000) warningText = "This is a very low-end model and may perform incoherently."; // 1B
if (!warningText) return;
$e("span", list_item, {
classes: ["material-icons-outlined", "model-size-warning"],
innerText: "warning",
"style.grid-area": "warning_icon",
tooltip: warningText
});
})();
(function() {
// Anon function to avoid unreasonable indentation
if (!item.isDownloaded) return;
if (!isModel) return;
$e("span", list_item, {
classes: ["material-icons-outlined", "model-download-notification"],
innerText: "download_done",
"style.grid-area": "downloaded_icon",
tooltip: "This model is already downloaded."
});
})();
list_item.append(popup_item);
model_list.append(list_item);
}
openPopup("load-model");
}
function model_settings_checker() {
//get check value:
missing_element = false;
if (this.check_data != null) {
if ('sum' in this.check_data) {
check_value = 0
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value);
} else {
missing_element = true;
}
}
} else {
check_value = this.value
}
if (this.check_data['check'] == "=") {
valid = (check_value == this.check_data['value']);
} else if (this.check_data['check'] == "!=") {
valid = (check_value != this.check_data['value']);
} else if (this.check_data['check'] == ">=") {
valid = (check_value >= this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value <= this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value > this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value < this.check_data['value']);
}
if (valid || missing_element) {
//if we are supposed to refresh when this value changes we'll resubmit
if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
//get an object of all the input settings from the user
data = {}
settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
if (settings_area) {
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
var element_data = element.value;
if (element.getAttribute("data_type") == "int") {
element_data = parseInt(element_data);
} else if (element.getAttribute("data_type") == "float") {
element_data = parseFloat(element_data);
} else if (element.getAttribute("data_type") == "bool") {
element_data = (element_data == 'on');
}
data[element.id.split("|")[1].replace("_value", "")] = element_data;
}
}
data = {...data, ...selected_model_data};
data['plugin'] = document.getElementById("modelplugin").value;
socket.emit("resubmit_model_info", data);
}
if ('sum' in this.check_data) {
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
}
}
} else {
this.closest(".setting_container_model").classList.remove('input_error');
this.closest(".setting_container_model").removeAttribute("tooltip");
}
} else {
if ('sum' in this.check_data) {
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error');
if (this.check_data['check_message']) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
} else {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
}
}
}
} else {
this.closest(".setting_container_model").classList.add('input_error');
if (this.check_data['check_message']) {
this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
} else {
this.closest(".setting_container_model").removeAttribute("tooltip");
}
}
}
}
var accept = document.getElementById("btn_loadmodelaccept");
ok_to_load = true;
for (const item of document.getElementsByClassName("input_error")) {
if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
ok_to_load = false;
break;
}
}
if (ok_to_load) {
accept.classList.remove("disabled");
accept.disabled = false;
} else {
accept.classList.add("disabled");
accept.disabled = true;
}
//We now have valid display boxes potentially. We'll go through them and update the display
for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
check_value = 0
missing_element = false;
for (const temp of item.check_data['sum']) {
if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) {
check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value);
} else {
missing_element = true;
}
}
if (!missing_element) {
item.innerText = item.original_text.replace("%1", check_value);
}
}
}
function selected_model_info(sent_data) {
const data = sent_data['model_backends'];
//clear out the loadmodelsettings
var loadmodelsettings = document.getElementById('loadmodelsettings')
while (loadmodelsettings.firstChild) {
loadmodelsettings.removeChild(loadmodelsettings.firstChild);
}
//Clear out plugin selector
var model_plugin = document.getElementById('modelplugin');
while (model_plugin.firstChild) {
model_plugin.removeChild(model_plugin.firstChild);
}
var accept = document.getElementById("btn_loadmodelaccept");
accept.disabled = false;
modelplugin = document.getElementById("modelplugin");
modelplugin.classList.remove("hidden");
modelplugin.onchange = function () {
for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
area.classList.add("hidden");
}
if (document.getElementById(this.value + "_settings_area")) {
document.getElementById(this.value + "_settings_area").classList.remove("hidden");
}
model_settings_checker()
}
//create the content
for (const [loader, items] of Object.entries(data)) {
model_area = document.createElement("DIV");
model_area.id = loader + "_settings_area";
model_area.classList.add("model_plugin_settings_area");
model_area.classList.add("hidden");
modelpluginoption = document.createElement("option");
modelpluginoption.innerText = loader;
modelpluginoption.value = loader;
modelplugin.append(modelpluginoption);
//create the user input for each requested input
for (item of items) {
let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
new_setting.id = loader;
new_setting.classList.remove("hidden");
new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
onchange_event = model_settings_checker;
if (item['uitype'] == "slider") {
var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
slider_number.value = item['default'];
slider_number.id = loader + "|" + item['id'] + "_value_text";
slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
var slider = new_setting.querySelector('#blank_model_settings_slider');
slider.value = item['default'];
slider.min = item['min'];
slider.max = item['max'];
slider.setAttribute("data_type", item['unit']);
slider.id = loader + "|" + item['id'] + "_value";
if ('check' in item) {
slider.check_data = item['check'];
slider_number.check_data = item['check'];
} else {
slider.check_data = null;
slider_number.check_data = null;
}
slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
slider.onchange = onchange_event;
slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
slider.noresubmit = true;
slider.onchange();
slider.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_slider').remove();
}
if (item['uitype'] == "toggle") {
toggle = document.createElement("input");
toggle.type='checkbox';
toggle.classList.add("setting_item_input");
toggle.classList.add("blank_model_settings_input");
toggle.classList.add("model_settings_input");
toggle.id = loader + "|" + item['id'] + "_value";
toggle.checked = item['default'];
toggle.onclick = onchange_event;
toggle.setAttribute("data_type", item['unit']);
toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
toggle.check_data = item['check'];
} else {
toggle.check_data = null;
}
new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
setTimeout(function() {
$('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
}, 200);
toggle.noresubmit = true;
toggle.onclick();
toggle.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_toggle').remove();
}
if (item['uitype'] == "dropdown") {
var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
select_element.id = loader + "|" + item['id'] + "_value";
for (const dropdown_value of item['children']) {
new_option = document.createElement("option");
new_option.value = dropdown_value['value'];
new_option.innerText = dropdown_value['text'];
select_element.append(new_option);
}
select_element.value = item['default'];
select_element.setAttribute("data_type", item['unit']);
select_element.onchange = onchange_event;
select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if (('multiple' in item) && (item['multiple'])) {
select_element.multiple = true;
select_element.size = 10;
}
if ('check' in item) {
select_element.check_data = item['check'];
} else {
select_element.check_data = null;
}
select_element.noresubmit = true;
select_element.onchange();
select_element.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_dropdown').remove();
}
if (item['uitype'] == "password") {
var password_item = new_setting.querySelector('#blank_model_settings_password');
password_item.id = loader + "|" + item['id'] + "_value";
password_item.value = item['default'];
password_item.setAttribute("data_type", item['unit']);
password_item.onchange = onchange_event;
password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
password_item.check_data = item['check'];
} else {
password_item.check_data = null;
}
password_item.noresubmit = true;
password_item.onchange();
password_item.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_password').remove();
}
if (item['uitype'] == "text") {
var text_item = new_setting.querySelector('#blank_model_settings_text');
text_item.id = loader + "|" + item['id'] + "_value";
text_item.value = item['default'];
text_item.onchange = onchange_event;
text_item.setAttribute("data_type", item['unit']);
text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
text_item.check_data = item['check'];
} else {
text_item.check_data = null;
}
text_item.noresubmit = true;
text_item.onchange();
text_item.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_text').remove();
}
if (item['uitype'] == "Valid Display") {
new_setting = document.createElement("DIV");
new_setting.classList.add("model_settings_valid_display");
new_setting.id = loader + "|" + item['id'] + "_value";
new_setting.innerText = item['label'];
new_setting.check_data = item['check'];
new_setting.original_text = item['label'];
}
model_area.append(new_setting);
loadmodelsettings.append(model_area);
}
}
//unhide the first plugin settings
if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
}
model_settings_checker()
}
function getModelParameterCount(modelName) {
if (!modelName) return null;
// The "T" and "K" may be a little optimistic...
let paramsString = modelName.toUpperCase().match(/[\d.]+[TBMK]/)
if (!paramsString) return null;
paramsString = paramsString[0];
let base = parseFloat(paramsString);
let multiplier = {T: 1_000_000_000_000, B: 1_000_000_000, M: 1_000_000, K: 1_000}[paramsString[paramsString.length - 1]];
return base * multiplier;
}
function openPopup(id) {
closePopups();
const container = document.getElementById("popup-container");
container.classList.remove("hidden");
for (const popupWindow of container.children) {
popupWindow.classList.add("hidden");
}
const popup = document.getElementById(`${id}`);
popup.classList.remove("hidden");
// Sometimes we want to instantly focus on certain elements when a menu opens.
for (const noticeMee of popup.getElementsByClassName("focus-on-me")) {
noticeMee.focus();
break;
}
}
function closePopups() {
const container = document.getElementById("popup-container");
container.classList.add("hidden");
for (const popupWindow of container.children) {
popupWindow.classList.add("hidden");
}
}
function $el(selector) {
// We do not preemptively fetch all elements upon execution (wall of consts)
// due to the layer of mental overhead it adds to debugging and reading
// code in general.
return document.querySelector(selector);
}
function $e(tag, parent, attributes, insertionLocation=null) {
// Small helper function for dynamic UI creation
let element = document.createElement(tag);
if (!attributes) attributes = {};
if ("classes" in attributes) {
if (!Array.isArray(attributes.classes)) throw Error("Classes was not array!");
for (const className of attributes.classes) {
element.classList.add(className);
}
delete attributes.classes;
}
for (const [attribute, value] of Object.entries(attributes)) {
if (attribute.includes(".")) {
let ref = element;
const parts = attribute.split(".");
for (const part of parts.slice(0, -1)) {
ref = ref[part];
}
ref[parts[parts.length - 1]] = value;
continue;
}
if (attribute in element) {
element[attribute] = value;
} else {
element.setAttribute(attribute, value);
}
}
if (!parent) return element;
if (insertionLocation && Object.keys(insertionLocation).length) {
let [placement, target] = Object.entries(insertionLocation)[0];
if (placement === "before") {
parent.insertBefore(element, target);
} else if (placement === "after") {
parent.insertBefore(element, target.nextSibling);
} else {
throw Error(`I have no clue what placement ${placement} is`);
}
} else {
parent.appendChild(element);
}
return element;
}
function load_model() {
var accept = document.getElementById('btn_loadmodelaccept');
settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
//get an object of all the input settings from the user
data = {}
if (settings_area) {
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
var element_data = element.value;
if ((element.tagName == "SELECT") && (element.multiple)) {
element_data = [];
for (var i=0, iLen=element.options.length; i<iLen; i++) {
if (element.options[i].selected) {
element_data.push(element.options[i].value);
}
}
} else {
if (element.getAttribute("data_type") == "int") {
element_data = parseInt(element_data);
} else if (element.getAttribute("data_type") == "float") {
element_data = parseFloat(element_data);
} else if (element.getAttribute("data_type") == "bool") {
element_data = (element_data == 'on');
}
}
data[element.id.split("|")[1].replace("_value", "")] = element_data;
}
}
data = {...data, ...selected_model_data};
data['plugin'] = document.getElementById("modelplugin").value;
socket.emit("load_model", data);
closePopups();
}
function initalizeTooltips() {
const tooltip = $e("span", document.body, {id: "tooltip-text", "style.display": "none"});
let tooltipTarget = null;
function alterTooltipState(target, specialClass=null) {
tooltipTarget = target;
tooltip.style.display = target ? "block" : "none";
tooltip.className = specialClass || "";
}
function registerElement(el) {
// el should have attribute "tooltip"
let text = el.getAttribute("tooltip");
el.addEventListener("mouseenter", function(event) {
if (!el.hasAttribute("tooltip")) return;
tooltip.innerText = text;
let specialClass = "tooltip-standard";
// Kinda lame
if (this.classList.contains("context-token")) specialClass = "tooltip-context-token";
alterTooltipState(el, specialClass);
});
el.addEventListener("mouseleave", function(event) {
alterTooltipState(null);
});
}
const xOffset = 10;
const yOffset = 15;
document.addEventListener("mousemove", function(event) {
if (!tooltipTarget) return;
let [x, y] = [event.x, event.y];
// X + the tooltip's width is the farthest point right we will display;
// let's account for it. If we will render outside of the window,
// subtract accordingly.
let xOverflow = (x + tooltip.clientWidth) - window.innerWidth;
if (xOverflow > 0) x -= xOverflow;
if (xOverflow + xOffset < 0) x += xOffset;
// Same for Y!
let yOverflow = (y + tooltip.clientHeight) - window.innerHeight;
if (yOverflow > 0) y -= yOverflow;
if (yOverflow + yOffset < 0) y += yOffset;
tooltip.style.left = `${x}px`;
tooltip.style.top = `${y}px`;
});
// Inital scan
for (const element of document.querySelectorAll("[tooltip]")) {
registerElement(element);
}
// Use a MutationObserver to catch future tooltips
const observer = new MutationObserver(function(records, observer) {
for (const record of records) {
if (record.type === "attributes") {
// Sanity check
if (record.attributeName !== "tooltip") continue;
registerElement(record.target);
continue;
}
// If we remove the tooltip target, stop showing the tooltip. Maybe a little ineffecient.
if (!document.body.contains(tooltipTarget)) alterTooltipState(null);
for (const node of record.addedNodes) {
if (node.nodeType !== 1) continue;
if (node.hasAttribute("tooltip")) registerElement(node);
// Register for descendants (Slow?)
for (const element of node.querySelectorAll("[tooltip]")) {
registerElement(element);
}
}
}
});
observer.observe(document.body, {
childList: true,
subtree: true,
attributeFilter: ["tooltip"],
});
}
// Must be done before any elements are made; we track their changes.
console.log(document.body);
initalizeTooltips();

View File

@@ -1728,4 +1728,691 @@ body.connected .popupfooter, .popupfooter.always-available {
.wientry > .input-token-usage {
bottom: 8px;
}
/*----------------------------------------------COPY FROM UI2-----------------------------------------------------------------------*/
:root {
/*General*/
--background: #252e3b;
--gamescreen_background: #111820;
--input_background: #111820;
--text: #e0e0e0;
--text_to_ai_color: #e0e0e0;
--text_edit: #9cc3ee;
--action_mode_input: #33E978;
--statusbar_color: #eedcb880;
--statusbar_text_color: #e0e0e0;
--scrollbar-color: #2f3b4bdb;
/*Buttons*/
/*General*/
--enabled_button_text: #e0e0e0;
--enabled_button_background_color: #2d3d52;
--enabled_button_border_color: #253446;
--disabled_button_text: #303030;
--disabled_button_background_color: #495762;
--disabled_button_border_color: #686c68;
/*Home Tab*/
--button_text: #e0e0e0;
--button_background: #283445;
/*Alternate Button*/
--alternate_button_text: #e0e0e0;
--alternate_button_background: #283445;
/*Buttons -> Icon Button*/
--icon_button_background:;
--icon_button_color:;
--icon_button_border_color:;
/*Context Menu*/
--context_menu_text:;
--context_menu_background:;
--context_menu_border:;
--context_menu_division:;
--context_menu_hover_text:;
--context_menu_hover_background:;
/*Sequence, AKA Gens Per Action*/
--sequence_area_background: #111820;
--sequence_background: #eedcb8;
--sequence_text: #e0e0e0;
/*Side Menus*/
--tab_color: #243047;
--flyout_background: #18222d;
--flyout_background_pinned: #18222d;
--setting_background: #273141;
--setting_text: #e0e0e0;
--sample_order_select_color: #1f2934;
--sample_order_select_color_text: #eedcb8;
--dropdown_text: #e0e0e0;
--dropdown_background: #212935;
--rangeslider_background_color: #1f2934;
--rangeslider_color: #1f2934;
--rangeslider_circle_color: #404d64;
--help_icon: #7c8389;
--tooltip_text: #e0e0e0;
--tooltip_background: #303c50;
--setting_category_help_text_color: #E0E0E0;
--setting_footer_border_color: #334552;
--setting_footer_text_color: #e0e0e0;
--setting_footer_background_color: #18222d;
/*Substitution Card*/
--substitution_card_input_border:;
--substitution_card_input_background:;
/*Palette Card*/
--palette_card_background: #273141;
--palette_card_text: #e0e0e0;
--palette_table_border: #607c90;
/*World Info*/
--wi_card_border_color: #334552;
--wi_card_border_color_to_ai: #eedcb880;
--wi_card_bg_color: #223040;
--wi_card_text_color: #e0e0e0;
--wi_card_tag_bg_color: #1d2835;
--wi_card_tag_text_color: #e0e0e0;
--wi_tag_color: #283445;
--wi_tag_text_color: #e0e0e0;
/*Popup*/
--popup_background_color: #1a2530;
--popup_title_bar_color: #283445;
--popup_title_bar_color_text: #e0e0e0;
--popup_item_color: #1a2530;
--popup_item_color_text: #e0e0e0;
--popup_hover_color: #1e2733;
--popup_hover_color_text: #e0e0e0;
--popup_selected_color: #242d3c;
--popup_selected_color_text: #eedcb8;
--popup_button_color: #283445;
--popup_button_color_text: #e0e0e0;
--popup_cancel_button_color: #25364a;
--popup_cancel_button_color_text: #e0e0e0;
--error: #19242c;
--error_text: #e0e0e0;
--error_title: #25364a;
--error_title_text: #e0e0e0;
/*Context Bar Colors*/
--context_colors_memory: #04325c;
--context_colors_authors_notes: #165a62;
--context_colors_world_info: #1864a3;
--context_colors_prompt: #868686;
--context_colors_game_text: #63710e;
--context_colors_submit: #ffffff00;
--context_colors_unused: #ffffff24;
--context_colors_soft_prompt: #141414;
--context_colors_genre: #2c5c88;
/*Parameters*/
--scrollbar-size: 6px;
--palette_card_shadow: 0;
--wi_card_shadow: 0;
--light_shadow_value: 0;
--left_menu_strong_shadow: 0;
--right_menu_light_shadow: 0;
--right_menu_strong_shadow: 0;
--context_menu_shadow: var(--wi_card_shadow);
--radius_inputbox: 2px;
--radius_unpinned_menu: 2px;
--radius_sequence: 5px;
--radius_settings_background: 2px;
--radius_button: 2px;
--radius_alternate_button: 2px;
--radius_item_popup: 2px;
--radius_wi_card: 5px;
--radius_palette_card: 5px;
--radius_settings_button: 2px;
--tabs_rounding: 2px;
--radius_context_menu: 2px;
--radius_context_menu_hover: 2px;
--radius_genre_tag: 2px;
--radius_tooltip: 2px;
/*----------------VARIABLES--------------------*/
--flyout_menu_closed_width: 0px;
--setting_menu_closed_width_no_pins_width: 0px;
--story_options_size: 30%;
--story_pinned_areas_left:"menuicon options gamescreen lefticon"
"menuicon theme theme lefticon"
"menuicon inputrow inputrow lefticon";
--story_pinned_areas_right:"menuicon gamescreen options lefticon"
"menuicon theme theme lefticon"
"menuicon inputrow inputrow lefticon";
--story_pinned_area_widths_left: 30pxvar(--story_options_size) auto 30px;
--story_pinned_area_widths_right: 30pxautovar(--story_options_size) 30px;
--story_pinned_areas:var(--story_pinned_areas_left);
--story_pinned_area_widths:var(--story_pinned_area_widths_left);
--font_size_adjustment: 0px;
--game_screen_font_size_adjustment: 1;}
}
/*---------------------------------- Popups -------------------------------------------------*/
@media only screen and (max-aspect-ratio: 7/5) {
.popup {
position: absolute;
top: 10vh;
left: 10%;
z-index: 999;
width: 80%;
height: 80vh;
border-radius: 15px;
box-shadow: var(--popup_shadow);
background-color: var(--popup_background_color);
display: flex;
flex-direction: column;
overflow: hidden;
}
}
@media only screen and (min-aspect-ratio: 7/5) {
.popup {
position: absolute;
top: 10vh;
left: 25%;
z-index: 999;
width: 50%;
height: 80vh;
border-radius: 15px;
box-shadow: var(--popup_shadow);
background-color: var(--popup_background_color);
display: flex;
flex-direction: column;
overflow: hidden;
}
}
.popup .title {
width: 100%;
background-color: var(--popup_title_bar_color);
color: var(--popup_title_bar_color_text);
text-align: center;
font-size: calc(1.3em + var(--font_size_adjustment));
}
.popup .action_button {
background-color: var(--popup_button_color);
color: var(--popup_button_color_text);
}
.popup .popup_list_area {
overflow-x: hidden;
overflow-y: scroll;
flex-grow: 1;
flex-shrink: 1;
flex-basis: auto;
color: var(--popup_item_color_text);
}
#modelspecifier, .popup .model_item {
width: 98%;
background-color: var(--popup_item_color);
color: var(--popup_item_color_text);
margin: 5px 0 5px 1%;
border-radius: var(--radius_item_popup);
padding: 2px;
display: grid;
grid-template-areas: "folder_icon delete_icon edit_icon rename_icon file gpu_size warning_icon downloaded_icon";
grid-template-columns: 30px 0px 0px 0px auto 50px 30px 30px;
}
.popup .model_item .folder_icon {
grid-area: folder_icon;
}
.popup .model_item .edit_icon {
grid-area: edit_icon;
}
.popup .model_item .rename_icon {
grid-area: rename_icon;
}
.popup .model_item .delete_icon {
grid-area: delete_icon;
}
.popup .model_item .model {
cursor: pointer;
grid-area: file;
}
.popup .header {
width: 98%;
background-color: var(--popup_item_color);
color: var(--popup_item_color_text);
margin: 5px 0 5px 1%;
padding: 2px;
}
.popup .item {
width: 98%;
background-color: var(--popup_item_color);
color: var(--popup_item_color_text);
margin: 5px 0 5px 1%;
border-radius: var(--radius_item_popup);
padding: 2px;
}
.popup .item:hover {
background-color: var(--popup_hover_color);
color: var(--popup_hover_color_text);
}
.popup .item.selected {
background: var(--popup_selected_color);
color: var(--popup_selected_color_text);
}
.popup .popup_load_cancel {
text-align: center;
vertical-align: bottom;
color: var(--popup_title_bar_color_text);
background-color: var(--popup_title_bar_color);
padding: 0 10px 0 10px;
}
.popup_load_cancel_button {
color: var(--popup_cancel_button_color_text);
border-color: var(--popup_cancel_button_color_text);
background-color: var(--popup_cancel_button_color);
vertical-align: bottom;
display: inline;
}
.table-header-container {
display: flex;
justify-content: space-between;
cursor: pointer;
}
.table-header-sort-icon {
margin-right: 10px;
margin-top: 2px;
}
.table-header-label {
margin-top: 4px;
}
#error_message.popup {
background-color: var(--error);
color: var(--error_text);
overflow: hidden;
}
#error_message .title {
width: 100%;
background-color: var(--error_title);
color: var(--error_title_text);
text-align: center;
font-size: calc(1.3em + var(--font_size_adjustment));
}
#error_message.popup .btn-primary {
background-color: var(--error);
color: var(--error_text);
border-color: var(--error_text);
}
#error_message .popup_load_cancel {
background-color: var(--error_title);
color: var(--error_title_text);
}
#error_message.popup .popup_list_area {
overflow-x: hidden;
overflow-y: scroll;
flex-grow: 1;
flex-shrink: 1;
flex-basis: auto;
background-color: var(--error);
color: var(--error_text);
}
.breadcrumbitem {
padding: 5px 10px 5px 10px;
color: #ffffff;
background-color: transparent;
border: none;
-moz-transition: background-color 0.25s ease-in;
-o-transition: background-color 0.25s ease-in;
-webkit-transition: background-color 0.25s ease-in;
transition: background-color 0.25s ease-in;
}
.breadcrumbitem:hover {
cursor: pointer;
background-color: #688f1f;
}
.loadmodelsettings {
overflow-y: auto;
max-height: 50%;
}
/*----------------------------- Model Load Popup ------------------------------------------*/
#specspan, .popup_list_area .model_item .model {
grid-area: file;
display: grid;
grid-template-areas: "item gpu_size";
grid-template-columns: auto 95px;
cursor: pointer;
}
#specspan {
grid-template-columns: auto 100px !important;
cursor: auto !important;
}
#model-spec-usage {
position: relative;
left: -20px;
}
.popup .model_item:hover {
background-color: var(--popup_hover_color);
color: var(--popup_hover_color_text);
}
.popup .model_item .selected {
background: var(--popup_selected_color);
color: var(--popup_selected_color_text);
}
.model_setting_container {
display: grid;
grid-template-areas: "label label"
"item item"
"minlabel maxlabel";
grid-template-rows: 20px 16px 19px;
grid-template-columns: auto 40px;
row-gap: 0.2em;
border: 1px;
margin: 2px;
}
.model_setting_minlabel {
grid-area: minlabel;
padding-top: 3px;
color: var(--popup_title_bar_color_text);
overflow: hidden;
text-align: left;
font-size: calc(0.8em + var(--font_size_adjustment));
}
.model_setting_maxlabel {
color: var(--popup_title_bar_color_text);
padding-top: 3px;
grid-area: maxlabel;
overflow: hidden;
text-align: right;
font-size: calc(0.8em + var(--font_size_adjustment));
}
.model_setting_label {
color: var(--popup_title_bar_color_text);
grid-area: label;
overflow: hidden;
text-align: left;
}
.model_setting_value {
color: var(--popup_title_bar_color_text);
text-align: left;
grid-area: label;
background-color: inherit;
color: inherit;
border: none;
outline: none;
}
.model_setting_value:focus {
color: var(--text_edit);
}
.model_setting_item {
grid-area: item;
overflow: hidden;
}
.model_setting_item_input {
width:95%;
}
@font-face {
font-family: 'Material Icons Outlined';
font-style: normal;
src: url(/static/MaterialIconsOutlined-Regular.otf) format('opentype');
}
.material-icons-outlined {
font-family: 'Material Icons Outlined';
font-weight: normal;
font-style: normal;
font-size: calc(24px + var(--font_size_adjustment)); /* Preferred icon size */
display: inline-block;
line-height: 1;
text-transform: none;
letter-spacing: normal;
word-wrap: normal;
white-space: nowrap;
direction: ltr;
/* Support for all WebKit browsers. */
-webkit-font-smoothing: antialiased;
/* Support for Safari and Chrome. */
text-rendering: optimizeLegibility;
/* Support for Firefox. */
-moz-osx-font-smoothing: grayscale;
/* Support for IE. */
font-feature-settings: 'liga';
}
.material-icons-outlined.cursor:hover{
filter: brightness(85%);
}
.setting_label .helpicon {
color: var(--help_icon);
cursor: help;
font-size: calc(14px + var(--font_size_adjustment)) !important;
flex: auto;
width: 15px;
align-self: flex-end;
line-height: inherit;
border-radius: inherit;
margin-right: inherit;
padding: inherit;
background: inherit;
border: inherit;
text-decoration: inherit;
}
#tooltip-text {
content: attr(tooltip);
position: fixed;
transition: opacity 0s linear 0.5s;
white-space: normal;
border-radius: var(--radius_tooltip);
opacity: 1;
padding: 7px;
color: var(--tooltip_text);
background-color: var(--tooltip_background);
pointer-events: none;
z-index: 9999999;
}
.tooltip-standard {
border: 1px ridge grey;
font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
width: min-context;
max-width: 25%;
/*margin-right: -3px;*/
}
.tooltip-context-token {
border: none;
font-family: monospace;
max-width: min-content;
}
/* Mobile tooltips */
@media (pointer: coarse), (hover: none) {
[tooltip]:after {
opacity: 0;
content: "";
}
[tooltip]:hover::after {
content: attr(tooltip);
position: fixed;
top: calc(var(--mouse-y) * 100vh);
left: calc(var(--mouse-x) * 100vw);
transform: translate(var(--tooltip_x), var(--tooltip-y));
transition: opacity 0s linear 0.5s;
opacity: 1;
padding: 0px 2px;
background-color: rgba(0, 0, 0, 0.6);
pointer-events: none;
z-index: 9999999;
}
}
.popup .model_item .model_menu_selected {
color: var(--popup_selected_color);
background-color: var(--popup_selected_color_text);
}
.settings_select {
color: var(--dropdown_text);
background: var(--dropdown_background);
margin-left: auto;
margin-right: 25px;
}
.setting_value {
text-align: right;
grid-area: value;
font-size: calc(12px + var(--font_size_adjustment));
padding: 2px;
padding-top: 0px;
background-color: inherit;
color: inherit;
border: none;
outline: none;
}
.setting_value:focus {
color: var(--text_edit);
}
.setting_container_model {
display: grid;
grid-template-areas: "label value"
"item item"
"minlabel maxlabel";
grid-template-rows: 20px auto 20px;
grid-template-columns: auto 30px;
row-gap: 0.2em;
background-color: var(--setting_background);
color: var(--setting_text);
border-radius: var(--radius_settings_background);
padding: 2px;
margin: 2px;
width: calc(100%);
}
.setting_container_model .setting_item{
font-size: calc(0.93em + var(--font_size_adjustment));
margin-left: 10px;
}
.setting_minlabel {
padding-top: 6px;
grid-area: minlabel;
overflow: hidden;
padding: 5px;
padding-top: 0px;
text-align: left;
font-size: calc(0.8em + var(--font_size_adjustment));
}
.setting_maxlabel {
padding-top: 6px;
grid-area: maxlabel;
overflow: hidden;
padding: 5px;
padding-top: 0px;
text-align: right;
font-size: calc(0.8em + var(--font_size_adjustment));
text-align: left;
}
.setting_label {
display: flex;
grid-area: label;
overflow: hidden;
padding: 5px;
padding-right: 0px;
padding-top: 0px;
}
.input_error {
border: 5px solid red !important;
box-sizing: border-box !important;
}
.popup .model_item.model_menu_selected {
color: var(--popup_selected_color);
background-color: var(--popup_selected_color_text);
}

View File

@@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding);
}
.setting_container_model {
display: grid;
grid-template-areas: "label value"
"item item"
"minlabel maxlabel";
grid-template-rows: 20px auto 20px;
grid-template-columns: auto 30px;
row-gap: 0.2em;
background-color: var(--setting_background);
color: var(--setting_text);
border-radius: var(--radius_settings_background);
padding: 2px;
margin: 2px;
width: calc(100%);
}
.setting_container_model .setting_item{
font-size: calc(0.93em + var(--font_size_adjustment));
margin-left: 10px;
}
.setting_minlabel {
padding-top: 6px;
grid-area: minlabel;
@@ -364,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding);
padding-top: 0px;
text-align: right;
font-size: calc(0.8em + var(--font_size_adjustment));
text-align: left;
}
.setting_label {
@@ -1990,6 +2013,11 @@ body {
grid-area: file;
}
.popup .model_item.model_menu_selected {
color: var(--popup_selected_color);
background-color: var(--popup_selected_color_text);
}
.popup .header {
width: 98%;
background-color: var(--popup_item_color);
@@ -2102,6 +2130,13 @@ body {
cursor: pointer;
background-color: #688f1f;
}
.loadmodelsettings {
overflow-y: auto;
max-height: 50%;
}
/*----------------------------- Model Load Popup ------------------------------------------*/
#specspan, .popup_list_area .model_item .model {
@@ -3370,6 +3405,23 @@ textarea {
}
}
@keyframes pulse-red {
0% {
transform: scale(0.95);
box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
}
70% {
transform: scale(1);
box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
}
100% {
transform: scale(0.95);
box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
}
}
@keyframes pulse-text {
0% {
filter: blur(3px);
@@ -3391,6 +3443,11 @@ textarea {
}
}
.input_error {
border: 5px solid red !important;
box-sizing: border-box !important;
}
.single_pulse {
animation: pulse-text 0.5s 1;
}
@@ -3495,7 +3552,7 @@ h2 .material-icons-outlined {
}
.horde_trigger[model_model="ReadOnly"],
.horde_trigger[model_model="Read Only"],
.horde_trigger[model_model="CLUSTER"] {
display: none;
}

View File

@@ -14,7 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);});
socket.on('popup_items', function(data){popup_items(data);});
socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
socket.on('popup_edit_file', function(data){popup_edit_file(data);});
socket.on('show_model_menu', function(data){show_model_menu(data);});
//socket.on('show_model_menu', function(data){show_model_menu(data);});
socket.on('open_model_load_menu', function(data){show_model_menu(data);});
socket.on('selected_model_info', function(data){selected_model_info(data);});
socket.on('oai_engines', function(data){oai_engines(data);});
socket.on('buildload', function(data){buildload(data);});
@@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true";
let story_id = -1;
var dirty_chunks = [];
var initial_socketio_connection_occured = false;
var selected_model_data;
// Each entry into this array should be an object that looks like:
// {class: "class", key: "key", func: callback}
@@ -1501,48 +1503,50 @@ function getModelParameterCount(modelName) {
}
function show_model_menu(data) {
//clear old options
document.getElementById("modelkey").classList.add("hidden");
document.getElementById("modelkey").value = "";
document.getElementById("modelurl").classList.add("hidden");
document.getElementById("use_gpu_div").classList.add("hidden");
document.getElementById("use_8_bit_div").classList.add("hidden");
document.getElementById("modellayers").classList.add("hidden");
document.getElementById("oaimodel").classList.add("hidden");
var model_layer_bars = document.getElementById('model_layer_bars');
while (model_layer_bars.firstChild) {
model_layer_bars.removeChild(model_layer_bars.firstChild);
//clear out the loadmodelsettings
var loadmodelsettings = document.getElementById('loadmodelsettings')
while (loadmodelsettings.firstChild) {
loadmodelsettings.removeChild(loadmodelsettings.firstChild);
}
//Clear out plugin selector
var model_plugin = document.getElementById('modelplugin');
while (model_plugin.firstChild) {
model_plugin.removeChild(model_plugin.firstChild);
}
model_plugin.classList.add("hidden");
var accept = document.getElementById("btn_loadmodelaccept");
accept.disabled = false;
//clear out the breadcrumbs
var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
while (breadcrumbs.firstChild) {
breadcrumbs.removeChild(breadcrumbs.firstChild);
}
//add breadcrumbs
//console.log(data.breadcrumbs);
for (item of data.breadcrumbs) {
var button = document.createElement("button");
button.classList.add("breadcrumbitem");
button.setAttribute("model", data.menu);
button.setAttribute("folder", item[0]);
button.textContent = item[1];
button.onclick = function () {
socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")});
};
breadcrumbs.append(button);
var span = document.createElement("span");
span.textContent = "\\";
breadcrumbs.append(span);
}
//add breadcrumbs
if ('breadcrumbs' in data) {
for (item of data.breadcrumbs) {
var button = document.createElement("button");
button.classList.add("breadcrumbitem");
button.setAttribute("model", data.menu);
button.setAttribute("folder", item[0]);
button.textContent = item[1];
button.onclick = function () {
socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
};
breadcrumbs.append(button);
var span = document.createElement("span");
span.textContent = "\\";
breadcrumbs.append(span);
}
}
//clear out the items
var model_list = document.getElementById('loadmodellistcontent')
while (model_list.firstChild) {
model_list.removeChild(model_list.firstChild);
}
//add items
for (item of data.data) {
for (item of data.items) {
var list_item = document.createElement("span");
list_item.classList.add("model_item");
@@ -1564,10 +1568,33 @@ function show_model_menu(data) {
//create the actual item
var popup_item = document.createElement("span");
popup_item.classList.add("model");
popup_item.setAttribute("display_name", item.label);
popup_item.id = item.name;
for (const key in item) {
if (key == "name") {
popup_item.id = item[key];
}
popup_item.setAttribute(key, item[key]);
}
popup_item.onclick = function() {
var attributes = this.attributes;
var obj = {};
for (var i = 0, len = attributes.length; i < len; i++) {
obj[attributes[i].name] = attributes[i].value;
}
//put the model data on the accept button so we can send it to the server when you accept
var accept = document.getElementById("popup_accept");
selected_model_data = obj;
//send the data to the server so it can figure out what data we need from the user for the model
socket.emit('select_model', obj);
//clear out the selected item and select this one visually
for (const element of document.getElementsByClassName("model_menu_selected")) {
element.classList.remove("model_menu_selected");
}
this.closest(".model_item").classList.add("model_menu_selected");
}
popup_item.setAttribute("Menu", data.menu)
//name text
var text = document.createElement("span");
text.style="grid-area: item;";
@@ -1615,241 +1642,327 @@ function show_model_menu(data) {
});
})();
popup_item.onclick = function () {
var accept = document.getElementById("btn_loadmodelaccept");
accept.classList.add("disabled");
socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")});
var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected");
for (model of model_list) {
model.classList.remove("selected");
}
this.classList.add("selected");
accept.setAttribute("selected_model", this.id);
accept.setAttribute("menu", this.getAttribute("Menu"));
accept.setAttribute("display_name", this.getAttribute("display_name"));
};
list_item.append(popup_item);
model_list.append(list_item);
}
var accept = document.getElementById("btn_loadmodelaccept");
accept.disabled = true;
//finally, if they selected the custom hugging face menu we show the input box
if (data['menu'] == "customhuggingface") {
document.getElementById("custommodelname").classList.remove("hidden");
} else {
document.getElementById("custommodelname").classList.add("hidden");
}
// detect if we are in a model selection screen and show the reference
var refelement = document.getElementById("modelspecifier");
var check = document.getElementById("mainmenu");
if (check) {
refelement.classList.remove("hidden");
} else {
refelement.classList.add("hidden");
}
openPopup("load-model");
}
function selected_model_info(data) {
function model_settings_checker() {
//get check value:
missing_element = false;
if (this.check_data != null) {
if ('sum' in this.check_data) {
check_value = 0
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value);
} else {
missing_element = true;
}
}
} else {
check_value = this.value
}
if (this.check_data['check'] == "=") {
valid = (check_value == this.check_data['value']);
} else if (this.check_data['check'] == "!=") {
valid = (check_value != this.check_data['value']);
} else if (this.check_data['check'] == ">=") {
valid = (check_value >= this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value <= this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value > this.check_data['value']);
} else if (this.check_data['check'] == "<=") {
valid = (check_value < this.check_data['value']);
}
if (valid || missing_element) {
//if we are supposed to refresh when this value changes we'll resubmit
if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
//get an object of all the input settings from the user
data = {}
settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
if (settings_area) {
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
var element_data = element.value;
if (element.getAttribute("data_type") == "int") {
element_data = parseInt(element_data);
} else if (element.getAttribute("data_type") == "float") {
element_data = parseFloat(element_data);
} else if (element.getAttribute("data_type") == "bool") {
element_data = (element_data == 'on');
}
data[element.id.split("|")[1].replace("_value", "")] = element_data;
}
}
data = {...data, ...selected_model_data};
data['plugin'] = document.getElementById("modelplugin").value;
socket.emit("resubmit_model_info", data);
}
if ('sum' in this.check_data) {
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
}
}
} else {
this.closest(".setting_container_model").classList.remove('input_error');
this.closest(".setting_container_model").removeAttribute("tooltip");
}
} else {
if ('sum' in this.check_data) {
for (const temp of this.check_data['sum']) {
if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error');
if (this.check_data['check_message']) {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
} else {
document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
}
}
}
} else {
this.closest(".setting_container_model").classList.add('input_error');
if (this.check_data['check_message']) {
this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
} else {
this.closest(".setting_container_model").removeAttribute("tooltip");
}
}
}
}
var accept = document.getElementById("btn_loadmodelaccept");
//hide or unhide key
if (data.key) {
document.getElementById("modelkey").classList.remove("hidden");
document.getElementById("modelkey").value = data.key_value;
} else {
document.getElementById("modelkey").classList.add("hidden");
document.getElementById("modelkey").value = "";
}
//hide or unhide URL
if (data.url) {
document.getElementById("modelurl").classList.remove("hidden");
} else {
document.getElementById("modelurl").classList.add("hidden");
}
//hide or unhide 8 bit mode
if (data.bit_8_available) {
document.getElementById("use_8_bit_div").classList.remove("hidden");
} else {
document.getElementById("use_8_bit_div").classList.add("hidden");
document.getElementById("use_8_bit").checked = false;
}
//default URL loading
if (data.default_url != null) {
document.getElementById("modelurl").value = data.default_url;
}
//change model loading on url if needed
if (data.models_on_url) {
document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});};
document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});};
} else {
document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});};
document.getElementById("modelurl").ochange = null;
}
//show model select for APIs
if (data.show_online_model_select) {
document.getElementById("oaimodel").classList.remove("hidden");
} else {
document.getElementById("oaimodel").classList.add("hidden");
}
//Multiple Model Select?
if (data.multi_online_models) {
document.getElementById("oaimodel").setAttribute("multiple", "");
document.getElementById("oaimodel").options[0].textContent = "All"
} else {
document.getElementById("oaimodel").removeAttribute("multiple");
document.getElementById("oaimodel").options[0].textContent = "Select Model(s)"
}
//hide or unhide the use gpu checkbox
if (data.gpu) {
document.getElementById("use_gpu_div").classList.remove("hidden");
} else {
document.getElementById("use_gpu_div").classList.add("hidden");
}
//setup breakmodel
if (data.breakmodel) {
document.getElementById("modellayers").classList.remove("hidden");
//setup model layer count
document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0);
document.getElementById("gpu_layers_max").textContent = data.layer_count;
document.getElementById("gpu_count").value = data.gpu_count;
//create the gpu load bars
var model_layer_bars = document.getElementById('model_layer_bars');
while (model_layer_bars.firstChild) {
model_layer_bars.removeChild(model_layer_bars.firstChild);
ok_to_load = true;
for (const item of document.getElementsByClassName("input_error")) {
if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
ok_to_load = false;
break;
}
//Add the bars
for (let i = 0; i < data.gpu_names.length; i++) {
var div = document.createElement("div");
div.classList.add("model_setting_container");
//build GPU text
var span = document.createElement("span");
span.classList.add("model_setting_label");
span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": "
//build layer count box
var input = document.createElement("input");
input.classList.add("model_setting_value");
input.classList.add("setting_value");
input.inputmode = "numeric";
input.id = "gpu_layers_box_"+i;
input.value = data.break_values[i];
input.onblur = function () {
document.getElementById(this.id.replace("_box", "")).value = this.value;
update_gpu_layers();
}
span.append(input);
div.append(span);
//build layer count slider
var input = document.createElement("input");
input.classList.add("model_setting_item");
input.type = "range";
input.min = 0;
input.max = data.layer_count;
input.step = 1;
input.value = data.break_values[i];
input.id = "gpu_layers_" + i;
input.onchange = function () {
document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value;
update_gpu_layers();
}
div.append(input);
//build slider bar #s
//min
var span = document.createElement("span");
span.classList.add("model_setting_minlabel");
var span2 = document.createElement("span");
span2.style="top: -4px; position: relative;";
span2.textContent = 0;
span.append(span2);
div.append(span);
//max
var span = document.createElement("span");
span.classList.add("model_setting_maxlabel");
var span2 = document.createElement("span");
span2.style="top: -4px; position: relative;";
span2.textContent = data.layer_count;
span.append(span2);
div.append(span);
model_layer_bars.append(div);
}
//add the disk layers
if (data.disk_break) {
var div = document.createElement("div");
div.classList.add("model_setting_container");
//build GPU text
var span = document.createElement("span");
span.classList.add("model_setting_label");
span.textContent = "Disk cache: "
//build layer count box
var input = document.createElement("input");
input.classList.add("model_setting_value");
input.classList.add("setting_value");
input.inputmode = "numeric";
input.id = "disk_layers_box";
input.value = data.disk_break_value;
input.onblur = function () {
document.getElementById(this.id.replace("_box", "")).value = this.value;
update_gpu_layers();
}
span.append(input);
div.append(span);
//build layer count slider
var input = document.createElement("input");
input.classList.add("model_setting_item");
input.type = "range";
input.min = 0;
input.max = data.layer_count;
input.step = 1;
input.value = data.disk_break_value;
input.id = "disk_layers";
input.onchange = function () {
document.getElementById(this.id+"_box").value = this.value;
update_gpu_layers();
}
div.append(input);
//build slider bar #s
//min
var span = document.createElement("span");
span.classList.add("model_setting_minlabel");
var span2 = document.createElement("span");
span2.style="top: -4px; position: relative;";
span2.textContent = 0;
span.append(span2);
div.append(span);
//max
var span = document.createElement("span");
span.classList.add("model_setting_maxlabel");
var span2 = document.createElement("span");
span2.style="top: -4px; position: relative;";
span2.textContent = data.layer_count;
span.append(span2);
div.append(span);
}
model_layer_bars.append(div);
update_gpu_layers();
} else {
document.getElementById("modellayers").classList.add("hidden");
}
if (ok_to_load) {
accept.classList.remove("disabled");
accept.disabled = false;
} else {
accept.classList.add("disabled");
accept.disabled = true;
}
//We now have valid display boxes potentially. We'll go through them and update the display
for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
check_value = 0
missing_element = false;
for (const temp of item.check_data['sum']) {
if (document.getElementById(item.id.split("|")[0] +"|" + temp + "_value")) {
check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|" + temp + "_value").value);
} else {
missing_element = true;
}
}
if (!missing_element) {
item.innerText = item.original_text.replace("%1", check_value);
}
}
}
function selected_model_info(sent_data) {
const data = sent_data['model_backends'];
//clear out the loadmodelsettings
var loadmodelsettings = document.getElementById('loadmodelsettings')
while (loadmodelsettings.firstChild) {
loadmodelsettings.removeChild(loadmodelsettings.firstChild);
}
//Clear out plugin selector
var model_plugin = document.getElementById('modelplugin');
while (model_plugin.firstChild) {
model_plugin.removeChild(model_plugin.firstChild);
}
var accept = document.getElementById("btn_loadmodelaccept");
accept.disabled = false;
modelplugin = document.getElementById("modelplugin");
modelplugin.classList.remove("hidden");
modelplugin.onchange = function () {
for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
area.classList.add("hidden");
}
if (document.getElementById(this.value + "_settings_area")) {
document.getElementById(this.value + "_settings_area").classList.remove("hidden");
}
model_settings_checker()
}
//create the content
for (const [loader, items] of Object.entries(data)) {
model_area = document.createElement("DIV");
model_area.id = loader + "_settings_area";
model_area.classList.add("model_plugin_settings_area");
model_area.classList.add("hidden");
modelpluginoption = document.createElement("option");
modelpluginoption.innerText = loader;
modelpluginoption.value = loader;
modelplugin.append(modelpluginoption);
//create the user input for each requested input
for (item of items) {
let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
new_setting.id = loader;
new_setting.classList.remove("hidden");
new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
onchange_event = model_settings_checker;
if (item['uitype'] == "slider") {
var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
slider_number.value = item['default'];
slider_number.id = loader + "|" + item['id'] + "_value_text";
slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
var slider = new_setting.querySelector('#blank_model_settings_slider');
slider.value = item['default'];
slider.min = item['min'];
slider.max = item['max'];
slider.setAttribute("data_type", item['unit']);
slider.id = loader + "|" + item['id'] + "_value";
if ('check' in item) {
slider.check_data = item['check'];
slider_number.check_data = item['check'];
} else {
slider.check_data = null;
slider_number.check_data = null;
}
slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
slider.onchange = onchange_event;
slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
slider.noresubmit = true;
slider.onchange();
slider.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_slider').remove();
}
if (item['uitype'] == "toggle") {
toggle = document.createElement("input");
toggle.type='checkbox';
toggle.classList.add("setting_item_input");
toggle.classList.add("blank_model_settings_input");
toggle.classList.add("model_settings_input");
toggle.id = loader + "|" + item['id'] + "_value";
toggle.checked = item['default'];
toggle.onclick = onchange_event;
toggle.setAttribute("data_type", item['unit']);
toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
toggle.check_data = item['check'];
} else {
toggle.check_data = null;
}
new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
setTimeout(function() {
$('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
}, 200);
toggle.noresubmit = true;
toggle.onclick();
toggle.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_toggle').remove();
}
if (item['uitype'] == "dropdown") {
var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
select_element.id = loader + "|" + item['id'] + "_value";
for (const dropdown_value of item['children']) {
new_option = document.createElement("option");
new_option.value = dropdown_value['value'];
new_option.innerText = dropdown_value['text'];
select_element.append(new_option);
}
select_element.value = item['default'];
select_element.setAttribute("data_type", item['unit']);
select_element.onchange = onchange_event;
select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if (('multiple' in item) && (item['multiple'])) {
select_element.multiple = true;
select_element.size = 10;
}
if ('check' in item) {
select_element.check_data = item['check'];
} else {
select_element.check_data = null;
}
select_element.noresubmit = true;
select_element.onchange();
select_element.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_dropdown').remove();
}
if (item['uitype'] == "password") {
var password_item = new_setting.querySelector('#blank_model_settings_password');
password_item.id = loader + "|" + item['id'] + "_value";
password_item.value = item['default'];
password_item.setAttribute("data_type", item['unit']);
password_item.onchange = onchange_event;
password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
password_item.check_data = item['check'];
} else {
password_item.check_data = null;
}
password_item.noresubmit = true;
password_item.onchange();
password_item.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_password').remove();
}
if (item['uitype'] == "text") {
var text_item = new_setting.querySelector('#blank_model_settings_text');
text_item.id = loader + "|" + item['id'] + "_value";
text_item.value = item['default'];
text_item.onchange = onchange_event;
text_item.setAttribute("data_type", item['unit']);
text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
if ('check' in item) {
text_item.check_data = item['check'];
} else {
text_item.check_data = null;
}
text_item.noresubmit = true;
text_item.onchange();
text_item.noresubmit = false;
} else {
new_setting.querySelector('#blank_model_settings_text').remove();
}
if (item['uitype'] == "Valid Display") {
new_setting = document.createElement("DIV");
new_setting.classList.add("model_settings_valid_display");
new_setting.id = loader + "|" + item['id'] + "_value";
new_setting.innerText = item['label'];
new_setting.check_data = item['check'];
new_setting.original_text = item['label'];
}
model_area.append(new_setting);
loadmodelsettings.append(model_area);
}
}
//unhide the first plugin settings
if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
}
model_settings_checker()
}
@@ -1877,42 +1990,37 @@ function update_gpu_layers() {
function load_model() {
var accept = document.getElementById('btn_loadmodelaccept');
gpu_layers = []
disk_layers = 0;
if (!(document.getElementById("modellayers").classList.contains("hidden"))) {
for (let i=0; i < document.getElementById("gpu_count").value; i++) {
gpu_layers.push(document.getElementById("gpu_layers_"+i).value);
}
if (document.getElementById("disk_layers")) {
disk_layers = document.getElementById("disk_layers").value;
settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
//get an object of all the input settings from the user
data = {}
if (settings_area) {
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
var element_data = element.value;
if ((element.tagName == "SELECT") && (element.multiple)) {
element_data = [];
for (var i=0, iLen=element.options.length; i<iLen; i++) {
if (element.options[i].selected) {
element_data.push(element.options[i].value);
}
}
} else {
if (element.getAttribute("data_type") == "int") {
element_data = parseInt(element_data);
} else if (element.getAttribute("data_type") == "float") {
element_data = parseFloat(element_data);
} else if (element.getAttribute("data_type") == "bool") {
element_data = (element_data == 'on');
}
}
data[element.id.split("|")[1].replace("_value", "")] = element_data;
}
}
//Need to do different stuff with custom models
if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) {
var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu");
var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name");
} else {
var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model");
var path = "";
}
data = {...data, ...selected_model_data};
let selected_models = [];
for (item of document.getElementById("oaimodel").selectedOptions) {
selected_models.push(item.value);
}
if (selected_models == ['']) {
selected_models = [];
} else if (selected_models.length == 1) {
selected_models = selected_models[0];
}
data['plugin'] = document.getElementById("modelplugin").value;
message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked,
'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(),
'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value,
'online_model': selected_models,
'use_8_bit': document.getElementById('use_8_bit').checked};
socket.emit("load_model", message);
socket.emit("load_model", data);
closePopups();
}

View File

@@ -18,7 +18,7 @@
<script src="static/bootstrap.min.js"></script>
<script src="static/bootstrap-toggle.min.js"></script>
<script src="static/rangy-core.min.js"></script>
<script src="static/application.js?ver=1.18.1f"></script>
<script defer src="static/application.js?ver=1.18.1f"></script>
<script src="static/favicon.js"></script>
</head>
<body>
@@ -283,59 +283,7 @@
</div>
</div>
</div>
<div class="popupcontainer hidden" id="loadmodelcontainer">
<div class="loadpopup">
<div class="popuptitlebar">
<div class="popuptitletext">Select A Model To Load</div>
</div>
<div id="loadmodellistbreadcrumbs">
</div>
<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
</div>
<div class="popupfooter">
<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
</div>
<div class="popupfooter">
<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
</div>
<div class="popupfooter hidden" id=modellayers>
<div class='settingitem' style="width:100%">
<div class='settinglabel'>
<div class="justifyleft">
GPU/Disk Layers
<span class="helpicon">?
<span class="helptext">Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.</span>
</span>
</div>
<div class="justifyright" id="gpu_layers_current">0</div>
</div>
<div id=model_layer_bars style="color: white">
</div>
<input type=hidden id='gpu_count' value=0/>
<div class="settingminmax">
<div class="justifyleft">
0
</div>
<div class="justifyright" id="gpu_layers_max">
24
</div>
</div>
</div>
</div>
<div class="popupfooter">
<button type="button" class="btn btn-primary" id="btn_loadmodelaccept">Load</button>
<button type="button" class="btn btn-primary" id="btn_loadmodelclose">Cancel</button>
<div class="box flex-push-right hidden" id=use_gpu_div>
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
<div class="box-label">Use GPU</div>
</div>
</div>
</div>
</div>
<div class="popupcontainer hidden" id="spcontainer">
<div id="sppopup">
<div class="popuptitlebar">
@@ -513,6 +461,12 @@
</div>
</div>
</div>
<!------------- Pop-Ups ------------------------------->
{% include 'popups.html' %}
<!------------- Templates ------------------------------->
<div class="hidden">
{% include 'templates.html' %}
</div>
</body>
</html>

View File

@@ -46,35 +46,11 @@
<div id="model-spec-usage">Usage (VRAM)</div>
</span>
</span>
<div id="loadmodellistbreadcrumbs">
</div>
<div id="loadmodellistbreadcrumbs"></div>
<div id="loadmodellistcontent" class="popup_list_area"></div>
<div id="loadmodelplugin" class="popup_load_cancel"><select id="modelplugin" class="settings_select hidden"></select></div>
<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
<div class="popup_load_cancel">
<div>
<input class="hidden fullwidth" type="text" placeholder="key" id="modelkey" onchange="socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});">
<input class="hidden fullwidth" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
<input class="hidden fullwidth" type="text" placeholder="Hugging Face Model Name" id="custommodelname" menu="" onblur="socket.emit('get_model_info', this.value);
document.getElementById('btn_loadmodelaccept').setAttribute('selected_model', this.value);
">
<select class="hidden fullwidth settings_select" id="oaimodel"><option value="">Select OAI Model</option></select>
</div>
<div class="hidden" id=modellayers>
<div class="justifyleft">
GPU/Disk Layers<span class="material-icons-outlined helpicon" tooltip="Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.">help_icon</span>
</div>
<div class="justifyright"><span id="gpu_layers_current">0</span>/<span id="gpu_layers_max">0</span></div>
<div id=model_layer_bars style="color: white"></div>
<input type=hidden id='gpu_count' value=0/>
</div>
<div class="box flex-push-right hidden" id=use_gpu_div>
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
<div class="box-label">Use GPU</div>
</div>
<div class="box flex-push-right hidden" id=use_8_bit_div onclick="set_8_bit_mode()">
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_8_bit" checked>
<div class="box-label">Use 8 bit mode</div>
</div>
<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>
<button type="button" class="btn popup_load_cancel_button" onclick='closePopups();' id="btn_loadmodelclose">Cancel</button>
</div>

View File

@@ -1,5 +1,4 @@
<!---------------- World Info Card ---------------------->
<link href="static/koboldai.css" rel="stylesheet">
<div draggable="true" class="world_info_card" id="world_info_">
<div class="world_info_title_area">
<div>
@@ -154,3 +153,21 @@
</div>
</div>
</div>
<!---------------- Model Settings ---------------------->
<div id="blank_model_settings" class="setting_container_model">
<span class="setting_label">
<span id="blank_model_settings_label">:&nbsp;</span><span id="blank_model_settings_tooltip" class="helpicon material-icons-outlined" style="text-align: left;" tooltip="">help_icon</span>
</span>
<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
<span class="setting_item">
<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
<!--<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">-->
<span id="blank_model_settings_toggle"></span>
<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">
</span>
<span class="setting_minlabel"><span style="position: relative;" id="blank_model_settings_min_label"></span></span>
<span class="setting_maxlabel"><span style="position: relative;" id="blank_model_settings_max_label"></span></span>
</span>
</div>

View File

@@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
return carry
class PenalizingCausalTransformer(CausalTransformer):
def __init__(self, config, **kwargs):
def __init__(self, badwordsids, config, **kwargs):
# Initialize
super().__init__(config, **kwargs)
def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None):
compiling_callback()
numseqs = numseqs_aux.shape[0]
# These are the tokens that we don't want the AI to ever write
badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
badwords = jnp.array(badwordsids).squeeze()
@hk.transform
def generate_sample(context, ctx_length):
# Give the initial context to the transformer
@@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
koboldai_vars.status_message = ""
def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
import koboldai_settings
def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
global thread_resources_env, seq, tokenizer, network, params, pad_token_id
if kwargs.get("pad_token_id"):
@@ -989,9 +991,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa
# Try to convert HF config.json to MTJ config
if hf_checkpoint:
spec_path = os.path.join("maps", koboldai_vars.model_type + ".json")
spec_path = os.path.join("maps", model_type + ".json")
if not os.path.isfile(spec_path):
raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}")
raise NotImplementedError(f"Unsupported model type {repr(model_type)}")
with open(spec_path) as f:
lazy_load_spec = json.load(f)
@@ -1119,12 +1121,12 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa
global badwords
# These are the tokens that we don't want the AI to ever write
badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
badwords = jnp.array(badwordsids).squeeze()
if not path.endswith("/"):
path += "/"
network = PenalizingCausalTransformer(params, dematerialized=True)
network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True)
if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX":
network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])