mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Next iteration. Model Loading is broken completely now :)
This commit is contained in:
180
aiserver.py
180
aiserver.py
@@ -645,10 +645,14 @@ def new_socketio_on(*a, **k):
|
||||
socketio.on = new_socketio_on
|
||||
|
||||
def emit(*args, **kwargs):
|
||||
try:
|
||||
return _emit(*args, **kwargs)
|
||||
except AttributeError:
|
||||
return socketio.emit(*args, **kwargs)
|
||||
if has_request_context():
|
||||
try:
|
||||
return _emit(*args, **kwargs)
|
||||
except AttributeError:
|
||||
return socketio.emit(*args, **kwargs)
|
||||
else: #We're trying to send data outside of the http context. This won't work. Try the relay
|
||||
if koboldai_settings.queue is not None:
|
||||
koboldai_settings.queue.put([args[0], args[1], kwargs])
|
||||
utils.emit = emit
|
||||
|
||||
#replacement for tpool.execute to maintain request contexts
|
||||
@@ -1780,10 +1784,6 @@ def get_cluster_models(msg):
|
||||
emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
|
||||
emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
|
||||
|
||||
|
||||
def reset_model_settings():
|
||||
koboldai_vars.reset_for_model_load()
|
||||
|
||||
|
||||
def unload_model():
|
||||
global model
|
||||
@@ -1816,7 +1816,7 @@ def unload_model():
|
||||
koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
|
||||
|
||||
|
||||
def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
|
||||
def load_model(plugin, initial_load=False):
|
||||
global model
|
||||
global tokenizer
|
||||
global model_config
|
||||
@@ -1827,79 +1827,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if initial_load:
|
||||
use_breakmodel_args = True
|
||||
|
||||
reset_model_settings()
|
||||
koboldai_vars.reset_model()
|
||||
|
||||
koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model
|
||||
if koboldai_vars.cluster_requested_models == [""]:
|
||||
koboldai_vars.cluster_requested_models = []
|
||||
|
||||
koboldai_vars.noai = False
|
||||
if not use_breakmodel_args:
|
||||
set_aibusy(True)
|
||||
if koboldai_vars.model != 'ReadOnly':
|
||||
emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
|
||||
#Have to add a sleep so the server will send the emit for some reason
|
||||
time.sleep(0.1)
|
||||
set_aibusy(True)
|
||||
if koboldai_vars.model != 'ReadOnly':
|
||||
emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
|
||||
#Have to add a sleep so the server will send the emit for some reason
|
||||
time.sleep(0.1)
|
||||
|
||||
if gpu_layers is not None:
|
||||
args.breakmodel_gpulayers = gpu_layers
|
||||
elif use_breakmodel_args:
|
||||
gpu_layers = args.breakmodel_gpulayers
|
||||
if breakmodel_args_default_to_cpu and gpu_layers is None:
|
||||
gpu_layers = args.breakmodel_gpulayers = []
|
||||
if disk_layers is not None:
|
||||
args.breakmodel_disklayers = int(disk_layers)
|
||||
elif use_breakmodel_args:
|
||||
disk_layers = args.breakmodel_disklayers
|
||||
if breakmodel_args_default_to_cpu and disk_layers is None:
|
||||
disk_layers = args.breakmodel_disklayers = 0
|
||||
if 'model' in globals():
|
||||
model.unload()
|
||||
|
||||
unload_model()
|
||||
|
||||
if online_model == "":
|
||||
koboldai_vars.configname = getmodelname()
|
||||
#Let's set the GooseAI or OpenAI server URLs if that's applicable
|
||||
else:
|
||||
koboldai_vars.online_model = online_model
|
||||
# Swap OAI Server if GooseAI was selected
|
||||
if koboldai_vars.model == "GooseAI":
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
|
||||
elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list):
|
||||
if len(online_model) != 1:
|
||||
koboldai_vars.configname = koboldai_vars.model
|
||||
else:
|
||||
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
|
||||
else:
|
||||
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
|
||||
|
||||
if path.exists(get_config_filename()):
|
||||
changed=False
|
||||
with open(get_config_filename(), "r") as file:
|
||||
# Check if API key exists
|
||||
js = json.load(file)
|
||||
if 'online_model' in js:
|
||||
if js['online_model'] != online_model:
|
||||
changed=True
|
||||
js['online_model'] = online_model
|
||||
else:
|
||||
changed=True
|
||||
js['online_model'] = online_model
|
||||
|
||||
if changed:
|
||||
with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
|
||||
file.write(json.dumps(js, indent=3))
|
||||
|
||||
# Swap OAI Server if GooseAI was selected
|
||||
if koboldai_vars.model == "GooseAI":
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
args.configname = "GooseAI" + "/" + online_model
|
||||
elif koboldai_vars.model != "CLUSTER":
|
||||
args.configname = koboldai_vars.model + "/" + online_model
|
||||
koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
|
||||
|
||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
@@ -1937,84 +1876,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
else:
|
||||
koboldai_vars.default_preset = koboldai_settings.default_preset
|
||||
|
||||
|
||||
# Ask for API key if InferKit was selected
|
||||
if koboldai_vars.model == "InferKit":
|
||||
koboldai_vars.apikey = koboldai_vars.oaiapikey
|
||||
|
||||
# Swap OAI Server if GooseAI was selected
|
||||
if koboldai_vars.model == "GooseAI":
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
koboldai_vars.configname = "GooseAI"
|
||||
|
||||
# Ask for API key if OpenAI was selected
|
||||
if koboldai_vars.model == "OAI" and not koboldai_vars.configname:
|
||||
koboldai_vars.configname = "OAI"
|
||||
|
||||
if koboldai_vars.model == "ReadOnly":
|
||||
koboldai_vars.noai = True
|
||||
|
||||
# TODO: InferKit
|
||||
if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
|
||||
pass
|
||||
elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
|
||||
koboldai_vars.colaburl = url or koboldai_vars.colaburl
|
||||
koboldai_vars.usegpu = False
|
||||
koboldai_vars.breakmodel = False
|
||||
|
||||
if koboldai_vars.model == "Colab":
|
||||
from modeling.inference_models.basic_api import model_loader
|
||||
model = model_loader()
|
||||
elif koboldai_vars.model == "API":
|
||||
from modeling.inference_models.api import model_loader
|
||||
model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
|
||||
elif koboldai_vars.model == "CLUSTER":
|
||||
from modeling.inference_models.horde import model_loader
|
||||
model = model_loader()
|
||||
elif koboldai_vars.model == "OAI":
|
||||
from modeling.inference_models.openai import model_loader
|
||||
model = model_loader()
|
||||
|
||||
model.load(initial_load=initial_load)
|
||||
# TODO: This check sucks, make a model object or somethign
|
||||
elif "rwkv" in koboldai_vars.model:
|
||||
if koboldai_vars.use_colab_tpu:
|
||||
raise RuntimeError("RWKV is not supported on the TPU.")
|
||||
from modeling.inference_models.rwkv import model_loader
|
||||
model = model_loader(koboldai_vars.model)
|
||||
model.load()
|
||||
elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
|
||||
# HF Torch
|
||||
logger.init("Transformers", status='Starting')
|
||||
for m in ("GPTJModel", "XGLMModel"):
|
||||
try:
|
||||
globals()[m] = getattr(__import__("transformers"), m)
|
||||
except:
|
||||
pass
|
||||
|
||||
from modeling.inference_models.generic_hf_torch import model_loader
|
||||
model = model_loader(
|
||||
koboldai_vars.model,
|
||||
lazy_load=koboldai_vars.lazy_load,
|
||||
low_mem=args.lowmem
|
||||
)
|
||||
|
||||
model.load(
|
||||
save_model=not (args.colab or args.cacheonly) or args.savemodel,
|
||||
initial_load=initial_load,
|
||||
)
|
||||
logger.info(f"Pipeline created: {koboldai_vars.model}")
|
||||
else:
|
||||
# TPU
|
||||
from modeling.inference_models.hf_mtj import model_loader
|
||||
model = model_loader(
|
||||
koboldai_vars.model
|
||||
)
|
||||
model.load(
|
||||
save_model=not (args.colab or args.cacheonly) or args.savemodel,
|
||||
initial_load=initial_load,
|
||||
)
|
||||
model = model_loaders[plugin]
|
||||
model.load(initial_load=initial_load)
|
||||
|
||||
# TODO: Convert everywhere to use model.tokenizer
|
||||
if model:
|
||||
@@ -6532,7 +6396,8 @@ def UI_2_select_model(data):
|
||||
def UI_2_load_model(data):
|
||||
logger.info("loading Model")
|
||||
logger.info(data)
|
||||
model_loaders[data['plugin']].set_input_parameters(**data)
|
||||
model_loaders[data['plugin']].set_input_parameters(data)
|
||||
load_model(data['plugin'])
|
||||
#load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
|
||||
|
||||
#==================================================================#
|
||||
@@ -8155,7 +8020,8 @@ def send_one_time_messages(data, wait_time=0):
|
||||
# Test
|
||||
#==================================================================#
|
||||
def model_info():
|
||||
if model_config is not None:
|
||||
global model_config
|
||||
if 'model_config' in globals() and model_config is not None:
|
||||
if isinstance(model_config, dict):
|
||||
if 'model_type' in model_config:
|
||||
model_type = str(model_config['model_type'])
|
||||
@@ -11045,7 +10911,7 @@ for schema in config_endpoint_schemas:
|
||||
def startup():
|
||||
if koboldai_vars.model == "" or koboldai_vars.model is None:
|
||||
koboldai_vars.model = "ReadOnly"
|
||||
socketio.start_background_task(load_model, **{'initial_load':True})
|
||||
socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True})
|
||||
|
||||
print("", end="", flush=True)
|
||||
|
||||
|
@@ -169,6 +169,7 @@ class InferenceModel:
|
||||
]
|
||||
self.tokenizer = None
|
||||
self.capabilties = ModelCapabilities()
|
||||
self.model_name = "Not Defined"
|
||||
|
||||
def is_valid(self, model_name, model_path, menu_path, vram):
|
||||
return True
|
||||
@@ -176,7 +177,7 @@ class InferenceModel:
|
||||
def requested_parameters(self, model_name, model_path, menu_path, vram):
|
||||
return {}
|
||||
|
||||
def define_input_parameters(self):
|
||||
def set_input_parameters(self, parameters):
|
||||
return
|
||||
|
||||
def load(self, save_model: bool = False, initial_load: bool = False) -> None:
|
||||
@@ -186,6 +187,9 @@ class InferenceModel:
|
||||
self._load(save_model=save_model, initial_load=initial_load)
|
||||
self._post_load()
|
||||
|
||||
def unload(self):
|
||||
return
|
||||
|
||||
def _pre_load(self) -> None:
|
||||
"""Pre load hook. Called before `_load()`."""
|
||||
|
||||
|
@@ -46,8 +46,8 @@ class model_loader(InferenceModel):
|
||||
})
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, base_url=""):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
def set_input_parameters(self, parameters):
|
||||
self.base_url = parameters['base_url'].rstrip("/")
|
||||
|
||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||
tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"]
|
||||
|
@@ -45,8 +45,8 @@ class model_loader(InferenceModel):
|
||||
})
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, colaburl=""):
|
||||
self.colaburl = colaburl
|
||||
def set_input_parameters(self, parameters):
|
||||
self.colaburl = parameters['colaburl']
|
||||
|
||||
def _initialize_model(self):
|
||||
return
|
||||
|
@@ -30,6 +30,7 @@ class model_loader(HFTorchInferenceModel):
|
||||
|
||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||
utils.koboldai_vars.allowsp = True
|
||||
self.lazy_load = utils.koboldai_vars.lazy_load
|
||||
|
||||
# Make model path the same as the model name to make this consistent
|
||||
# with the other loading method if it isn't a known model type. This
|
||||
|
@@ -78,10 +78,10 @@ class model_loader(InferenceModel):
|
||||
}])
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, url="", key="", model=""):
|
||||
self.key = key.strip()
|
||||
self.model = model
|
||||
self.url = url
|
||||
def set_input_parameters(self, parameters):
|
||||
self.key = parameters['key'].strip()
|
||||
self.model = parameters['model']
|
||||
self.url = parameters['url']
|
||||
|
||||
def get_cluster_models(self):
|
||||
# Get list of models from public cluster
|
||||
|
@@ -59,9 +59,9 @@ class model_loader(InferenceModel):
|
||||
}])
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, key="", model=""):
|
||||
self.key = key.strip()
|
||||
self.model = model
|
||||
def set_input_parameters(self, parameters):
|
||||
self.key = parameters['key'].strip()
|
||||
self.model = parameters['model']
|
||||
|
||||
def get_oai_models(self):
|
||||
if self.key == "":
|
||||
|
@@ -34,12 +34,12 @@ class HFInferenceModel(InferenceModel):
|
||||
requested_parameters = []
|
||||
|
||||
if model_path is not None and os.path.exists(model_path):
|
||||
model_config = AutoConfig.from_pretrained(model_path)
|
||||
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||
elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
|
||||
self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
|
||||
layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
|
||||
self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
|
||||
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
|
||||
if layer_count is not None and layer_count >= 0:
|
||||
if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
|
||||
with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
|
||||
@@ -61,11 +61,11 @@ class HFInferenceModel(InferenceModel):
|
||||
"uitype": "slider",
|
||||
"unit": "int",
|
||||
"label": "{} Layers".format(torch.cuda.get_device_name(i)),
|
||||
"id": "{} Layers".format(i),
|
||||
"id": "{}_Layers".format(i),
|
||||
"min": 0,
|
||||
"max": layer_count,
|
||||
"step": 1,
|
||||
"check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
|
||||
"default": break_values[i],
|
||||
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
|
||||
@@ -77,11 +77,11 @@ class HFInferenceModel(InferenceModel):
|
||||
"uitype": "slider",
|
||||
"unit": "int",
|
||||
"label": "CPU Layers",
|
||||
"id": "CPU Layers",
|
||||
"id": "CPU_Layers",
|
||||
"min": 0,
|
||||
"max": layer_count,
|
||||
"step": 1,
|
||||
"check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
|
||||
"default": layer_count - sum(break_values),
|
||||
"tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
|
||||
@@ -98,7 +98,7 @@ class HFInferenceModel(InferenceModel):
|
||||
"min": 0,
|
||||
"max": layer_count,
|
||||
"step": 1,
|
||||
"check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
|
||||
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
|
||||
"default": disk_blocks,
|
||||
"tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
|
||||
@@ -122,10 +122,40 @@ class HFInferenceModel(InferenceModel):
|
||||
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
|
||||
def set_input_parameters(self, parameters):
|
||||
gpu_count = torch.cuda.device_count()
|
||||
layers = []
|
||||
for i in range(gpu_count):
|
||||
layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
|
||||
self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
|
||||
self.layers = layers
|
||||
self.disk_layers = disk_layers
|
||||
self.use_gpu = use_gpu
|
||||
self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None
|
||||
self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
|
||||
self.model_name = parameters['id']
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
|
||||
def unload(self):
|
||||
if hasattr(self, 'model'):
|
||||
self.model = None
|
||||
if hasattr(self, 'tokenizer'):
|
||||
self.tokenizer = None
|
||||
if hasattr(self, 'model_config'):
|
||||
self.model_config = None
|
||||
with torch.no_grad():
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
|
||||
for tensor in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(tensor):
|
||||
tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
|
||||
except:
|
||||
pass
|
||||
gc.collect()
|
||||
try:
|
||||
with torch.no_grad():
|
||||
torch.cuda.empty_cache()
|
||||
except:
|
||||
pass
|
||||
|
||||
def _post_load(self) -> None:
|
||||
# These are model specific tokenizer overrides if a model has bad defaults
|
||||
@@ -187,7 +217,7 @@ class HFInferenceModel(InferenceModel):
|
||||
|
||||
return model_path
|
||||
|
||||
basename = utils.koboldai_vars.model.replace("/", "_")
|
||||
basename = self.model_name.replace("/", "_")
|
||||
if legacy:
|
||||
ret = basename
|
||||
else:
|
||||
|
@@ -398,7 +398,7 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
Embedding._koboldai_patch_causallm_model = self.model
|
||||
|
||||
def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True):
|
||||
if not self.lazy_load:
|
||||
if not utils.koboldai_vars.lazy_load:
|
||||
return
|
||||
|
||||
if utils.args.breakmodel_disklayers is not None:
|
||||
|
77
modeling/inference_models/readonly.py
Normal file
77
modeling/inference_models/readonly.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
import requests
|
||||
import numpy as np
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import utils
|
||||
from logger import logger
|
||||
from modeling.inference_model import (
|
||||
GenerationResult,
|
||||
GenerationSettings,
|
||||
InferenceModel,
|
||||
ModelCapabilities,
|
||||
)
|
||||
|
||||
|
||||
class BasicAPIException(Exception):
|
||||
"""To be used for errors when using the Basic API as an interface."""
|
||||
|
||||
|
||||
class model_loader(InferenceModel):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
# Do not allow API to be served over the API
|
||||
self.capabilties = ModelCapabilities(api_host=False)
|
||||
self.tokenizer = self._tokenizer()
|
||||
self.model = None
|
||||
self.model_name = "Read Only"
|
||||
|
||||
def is_valid(self, model_name, model_path, menu_path):
|
||||
return model_name == "ReadOnly"
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path):
|
||||
requested_parameters = []
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, parameters):
|
||||
return
|
||||
|
||||
def unload(self):
|
||||
utils.koboldai_vars.noai = False
|
||||
|
||||
def _initialize_model(self):
|
||||
return
|
||||
|
||||
class _tokenizer():
|
||||
def __init__(self):
|
||||
self._koboldai_header = []
|
||||
def decode(self, _input):
|
||||
return ""
|
||||
def encode(self, input_text):
|
||||
return []
|
||||
|
||||
def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
|
||||
self.tokenizer = self.tokenizer
|
||||
self.model = None
|
||||
utils.koboldai_vars.noai = True
|
||||
|
||||
def _raw_generate(
|
||||
self,
|
||||
prompt_tokens: Union[List[int], torch.Tensor],
|
||||
max_new: int,
|
||||
gen_settings: GenerationSettings,
|
||||
single_line: bool = False,
|
||||
batch_count: int = 1,
|
||||
seed: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
return GenerationResult(
|
||||
model=self,
|
||||
out_batches=np.array([]),
|
||||
prompt=prompt_tokens,
|
||||
is_whole_generation=True,
|
||||
single_line=single_line,
|
||||
)
|
@@ -14,8 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);});
|
||||
socket.on('popup_items', function(data){popup_items(data);});
|
||||
socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
|
||||
socket.on('popup_edit_file', function(data){popup_edit_file(data);});
|
||||
socket.on('show_model_menu', function(data){show_model_menu(data);});
|
||||
socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
|
||||
//socket.on('show_model_menu', function(data){show_model_menu(data);});
|
||||
socket.on('open_model_load_menu', function(data){show_model_menu(data);});
|
||||
socket.on('selected_model_info', function(data){selected_model_info(data);});
|
||||
socket.on('oai_engines', function(data){oai_engines(data);});
|
||||
socket.on('buildload', function(data){buildload(data);});
|
||||
@@ -1502,13 +1502,18 @@ function getModelParameterCount(modelName) {
|
||||
return base * multiplier;
|
||||
}
|
||||
|
||||
function new_show_model_menu(data) {
|
||||
function show_model_menu(data) {
|
||||
//clear out the loadmodelsettings
|
||||
var loadmodelsettings = document.getElementById('loadmodelsettings')
|
||||
while (loadmodelsettings.firstChild) {
|
||||
loadmodelsettings.removeChild(loadmodelsettings.firstChild);
|
||||
}
|
||||
document.getElementById("modelplugin").classList.add("hidden");
|
||||
//Clear out plugin selector
|
||||
var model_plugin = document.getElementById('modelplugin');
|
||||
while (model_plugin.firstChild) {
|
||||
model_plugin.removeChild(model_plugin.firstChild);
|
||||
}
|
||||
model_plugin.classList.add("hidden");
|
||||
var accept = document.getElementById("btn_loadmodelaccept");
|
||||
accept.disabled = false;
|
||||
|
||||
|
Reference in New Issue
Block a user