mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge commit 'refs/pull/110/head' of https://github.com/ebolam/KoboldAI into UI2
This commit is contained in:
316
aiserver.py
316
aiserver.py
@@ -221,6 +221,7 @@ model_menu = {
|
||||
["InferKit API (requires API key)", "InferKit", "", False],
|
||||
# ["KoboldAI Server API (Old Google Colab)", "Colab", "", False],
|
||||
["KoboldAI API", "API", "", False],
|
||||
["KoboldAI Horde", "CLUSTER", "", False],
|
||||
["Return to Main Menu", "mainmenu", "", True],
|
||||
]
|
||||
}
|
||||
@@ -467,6 +468,18 @@ api_v1 = KoboldAPISpec(
|
||||
tags=tags,
|
||||
)
|
||||
|
||||
# Returns the expected config filename for the current setup.
|
||||
# If the model_name is specified, it returns what the settings file would be for that model
|
||||
def get_config_filename(model_name = None):
|
||||
if model_name:
|
||||
return(f"settings/{model_name.replace('/', '_')}.settings")
|
||||
elif args.configname:
|
||||
return(f"settings/{args.configname.replace('/', '_')}.settings")
|
||||
elif vars.configname != '':
|
||||
return(f"settings/{vars.configname.replace('/', '_')}.settings")
|
||||
else:
|
||||
print(f"Empty configfile name sent back. Defaulting to ReadOnly")
|
||||
return(f"settings/ReadOnly.settings")
|
||||
#==================================================================#
|
||||
# Function to get model selection at startup
|
||||
#==================================================================#
|
||||
@@ -578,9 +591,8 @@ def check_if_dir_is_model(path):
|
||||
# Return Model Name
|
||||
#==================================================================#
|
||||
def getmodelname():
|
||||
if(args.configname):
|
||||
modelname = args.configname
|
||||
return modelname
|
||||
if(vars.online_model != ''):
|
||||
return(f"{vars.model}/{vars.online_model}")
|
||||
if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
|
||||
return modelname
|
||||
@@ -996,6 +1008,8 @@ def general_startup(override_args=None):
|
||||
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
|
||||
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
||||
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
||||
parser.add_argument("--apikey", help="Specify the API key to use for online services")
|
||||
parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
|
||||
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
|
||||
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
|
||||
parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
|
||||
@@ -1058,6 +1072,11 @@ def general_startup(override_args=None):
|
||||
koboldai_vars.model = args.model;
|
||||
koboldai_vars.revision = args.revision
|
||||
|
||||
if args.apikey:
|
||||
vars.apikey = args.apikey
|
||||
if args.req_model:
|
||||
vars.cluster_requested_models = args.req_model
|
||||
|
||||
if args.colab:
|
||||
args.remote = True;
|
||||
args.override_rename = True;
|
||||
@@ -1156,12 +1175,30 @@ def get_model_info(model, directory=""):
|
||||
key_value = ""
|
||||
break_values = []
|
||||
url = False
|
||||
default_url = None
|
||||
models_on_url = False
|
||||
multi_online_models = False
|
||||
gpu_count = torch.cuda.device_count()
|
||||
gpu_names = []
|
||||
for i in range(gpu_count):
|
||||
gpu_names.append(torch.cuda.get_device_name(i))
|
||||
if model in ['Colab', 'API']:
|
||||
url = True
|
||||
elif model == 'CLUSTER':
|
||||
models_on_url = True
|
||||
url = True
|
||||
key = True
|
||||
default_url = 'https://koboldai.net'
|
||||
multi_online_models = True
|
||||
if path.exists(get_config_filename(model)):
|
||||
with open(get_config_filename(model), "r") as file:
|
||||
# Check if API key exists
|
||||
js = json.load(file)
|
||||
if("apikey" in js and js["apikey"] != ""):
|
||||
# API key exists, grab it and close the file
|
||||
key_value = js["apikey"]
|
||||
elif 'oaiapikey' in js and js['oaiapikey'] != "":
|
||||
key_value = js["oaiapikey"]
|
||||
elif model in [x[1] for x in model_menu['apilist']]:
|
||||
if path.exists("settings/{}.v2_settings".format(model)):
|
||||
with open("settings/{}.v2_settings".format(model), "r") as file:
|
||||
@@ -1202,13 +1239,13 @@ def get_model_info(model, directory=""):
|
||||
break_values = [layer_count]
|
||||
break_values = [int(x) for x in break_values if x != '']
|
||||
break_values += [0] * (gpu_count - len(break_values))
|
||||
emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key,
|
||||
emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
||||
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel,
|
||||
'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE,
|
||||
'break_values': break_values, 'gpu_count': gpu_count,
|
||||
'url': url, 'gpu_names': gpu_names}, broadcast=True, room="UI_1")
|
||||
emit('selected_model_info', {'key_value': key_value, 'key':key,
|
||||
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel,
|
||||
'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url,
|
||||
'disk_break_value': disk_blocks, 'disk_break': utils.HAS_ACCELERATE,
|
||||
'break_values': break_values, 'gpu_count': gpu_count,
|
||||
'url': url, 'gpu_names': gpu_names}, broadcast=False, room="UI_2")
|
||||
@@ -1216,7 +1253,7 @@ def get_model_info(model, directory=""):
|
||||
|
||||
|
||||
def get_layer_count(model, directory=""):
|
||||
if(model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||
if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||
if(model == "GPT2Custom"):
|
||||
with open(os.path.join(directory, "config.json"), "r") as f:
|
||||
model_config = json.load(f)
|
||||
@@ -1283,6 +1320,8 @@ def get_oai_models(data):
|
||||
if "apikey" in js:
|
||||
if js['apikey'] != key:
|
||||
changed=True
|
||||
else:
|
||||
changed=True
|
||||
if changed:
|
||||
with open("settings/{}.v2_settings".format(model), "w") as file:
|
||||
js["apikey"] = key
|
||||
@@ -1296,6 +1335,55 @@ def get_oai_models(data):
|
||||
print(req.json())
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
|
||||
|
||||
def get_cluster_models(msg):
|
||||
vars.oaiapikey = msg['key']
|
||||
vars.apikey = vars.oaiapikey
|
||||
url = msg['url']
|
||||
|
||||
|
||||
# Get list of models from public cluster
|
||||
print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
|
||||
req = requests.get("{}/models".format(url))
|
||||
if(req.status_code == 200):
|
||||
engines = req.json()
|
||||
print(engines)
|
||||
try:
|
||||
engines = [[en, en] for en in engines]
|
||||
except:
|
||||
print(engines)
|
||||
raise
|
||||
print(engines)
|
||||
|
||||
online_model = ""
|
||||
changed=False
|
||||
|
||||
#Save the key
|
||||
if not path.exists("settings"):
|
||||
# If the client settings file doesn't exist, create it
|
||||
# Write API key to file
|
||||
os.makedirs('settings', exist_ok=True)
|
||||
if path.exists(get_config_filename(vars.model_selected)):
|
||||
with open(get_config_filename(vars.model_selected), "r") as file:
|
||||
js = json.load(file)
|
||||
if 'online_model' in js:
|
||||
online_model = js['online_model']
|
||||
if "apikey" in js:
|
||||
if js['apikey'] != vars.oaiapikey:
|
||||
changed=True
|
||||
else:
|
||||
changed=True
|
||||
if changed:
|
||||
js={}
|
||||
with open(get_config_filename(vars.model_selected), "w") as file:
|
||||
js["apikey"] = vars.oaiapikey
|
||||
file.write(json.dumps(js, indent=3))
|
||||
|
||||
emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True)
|
||||
else:
|
||||
# Something went wrong, print the message and quit since we can't initialize an engine
|
||||
print("{0}ERROR!{1}".format(colors.RED, colors.END))
|
||||
print(req.json())
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
|
||||
|
||||
# Function to patch transformers to use our soft prompt
|
||||
def patch_causallm(model):
|
||||
@@ -1808,6 +1896,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
model = None
|
||||
generator = None
|
||||
model_config = None
|
||||
vars.online_model = ''
|
||||
with torch.no_grad():
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
|
||||
@@ -1827,9 +1916,24 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
#Reload our badwords
|
||||
koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
|
||||
|
||||
if online_model == "":
|
||||
vars.configname = vars.model.replace('/', '_')
|
||||
#Let's set the GooseAI or OpenAI server URLs if that's applicable
|
||||
if online_model != "":
|
||||
if path.exists("settings/{}.v2_settings".format(koboldai_vars.model)):
|
||||
else:
|
||||
koboldai_vars.online_model = online_model
|
||||
# Swap OAI Server if GooseAI was selected
|
||||
if(koboldai_vars.model == "GooseAI"):
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
|
||||
elif(koboldai_vars.model == "CLUSTER") and type(online_model) is list:
|
||||
if len(online_model) != 1:
|
||||
koboldai_vars.configname = koboldai_vars.model
|
||||
else:
|
||||
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
|
||||
else:
|
||||
koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
|
||||
if path.exists(get_config_filename()):
|
||||
changed=False
|
||||
with open("settings/{}.v2_settings".format(koboldai_vars.model), "r") as file:
|
||||
# Check if API key exists
|
||||
@@ -1844,18 +1948,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if changed:
|
||||
with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
|
||||
file.write(json.dumps(js, indent=3))
|
||||
|
||||
# Swap OAI Server if GooseAI was selected
|
||||
if(koboldai_vars.model == "GooseAI"):
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
args.configname = "GooseAI" + "/" + online_model
|
||||
else:
|
||||
elif vars.model != "CLUSTER":
|
||||
args.configname = koboldai_vars.model + "/" + online_model
|
||||
koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
|
||||
|
||||
|
||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||
if(koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
koboldai_vars.allowsp = True
|
||||
# Test for GPU support
|
||||
|
||||
@@ -1894,7 +1999,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
||||
koboldai_vars.model_type = "gpt_neo"
|
||||
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
loadmodelsettings()
|
||||
loadsettings()
|
||||
print(2)
|
||||
@@ -1937,18 +2042,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if(koboldai_vars.model == "GooseAI"):
|
||||
koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
|
||||
koboldai_vars.model = "OAI"
|
||||
args.configname = "GooseAI"
|
||||
vars.configname = "GooseAI"
|
||||
|
||||
# Ask for API key if OpenAI was selected
|
||||
if(koboldai_vars.model == "OAI"):
|
||||
if not args.configname:
|
||||
args.configname = "OAI"
|
||||
if not vars.configname:
|
||||
vars.configname = "OAI"
|
||||
|
||||
if(koboldai_vars.model == "ReadOnly"):
|
||||
koboldai_vars.noai = True
|
||||
|
||||
# Start transformers and create pipeline
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.noai):
|
||||
print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
|
||||
for m in ("GPTJModel", "XGLMModel"):
|
||||
@@ -2406,7 +2511,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
}
|
||||
|
||||
# If we're running Colab or OAI, we still need a tokenizer.
|
||||
if(koboldai_vars.model in ("Colab", "API")):
|
||||
if(koboldai_vars.model in ("Colab", "API", "CLUSTER")):
|
||||
from transformers import GPT2TokenizerFast
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
loadsettings()
|
||||
@@ -3097,7 +3202,7 @@ def lua_set_chunk(k, v):
|
||||
def lua_get_modeltype():
|
||||
if(koboldai_vars.noai):
|
||||
return "readonly"
|
||||
if(koboldai_vars.model in ("Colab", "API", "OAI", "InferKit")):
|
||||
if(koboldai_vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
|
||||
return "api"
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (koboldai_vars.model in ("GPT2Custom", "NeoCustom") or koboldai_vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
|
||||
hidden_size = get_hidden_size_from_model(model)
|
||||
@@ -3126,7 +3231,7 @@ def lua_get_modeltype():
|
||||
def lua_get_modelbackend():
|
||||
if(koboldai_vars.noai):
|
||||
return "readonly"
|
||||
if(koboldai_vars.model in ("Colab", "API", "OAI", "InferKit")):
|
||||
if(koboldai_vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
|
||||
return "api"
|
||||
if(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
return "mtj"
|
||||
@@ -3564,6 +3669,8 @@ def get_message(msg):
|
||||
elif(msg['cmd'] == 'list_model'):
|
||||
sendModelSelection(menu=msg['data'])
|
||||
elif(msg['cmd'] == 'load_model'):
|
||||
print(msg)
|
||||
print(vars.model_selected)
|
||||
if not os.path.exists("settings/"):
|
||||
os.mkdir("settings")
|
||||
changed = True
|
||||
@@ -3587,6 +3694,14 @@ def get_message(msg):
|
||||
f.close()
|
||||
koboldai_vars.colaburl = msg['url'] + "/request"
|
||||
koboldai_vars.model = koboldai_vars.model_selected
|
||||
if vars.model == "CLUSTER":
|
||||
if type(msg['online_model']) is not list:
|
||||
if msg['online_model'] == '':
|
||||
vars.cluster_requested_models = []
|
||||
else:
|
||||
vars.cluster_requested_models = [msg['online_model']]
|
||||
else:
|
||||
vars.cluster_requested_models = msg['online_model']
|
||||
load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
|
||||
elif(msg['cmd'] == 'show_model'):
|
||||
print("Model Name: {}".format(getmodelname()))
|
||||
@@ -3650,6 +3765,8 @@ def get_message(msg):
|
||||
print(colors.RED + "WARNING!!: Someone maliciously attempted to delete " + msg['data'] + " the attempt has been blocked.")
|
||||
elif(msg['cmd'] == 'OAI_Key_Update'):
|
||||
get_oai_models({'model': koboldai_vars.model, 'key': msg['key']})
|
||||
elif(msg['cmd'] == 'Cluster_Key_Update'):
|
||||
get_cluster_models(msg)
|
||||
elif(msg['cmd'] == 'loadselect'):
|
||||
koboldai_vars.loadselect = msg["data"]
|
||||
elif(msg['cmd'] == 'spselect'):
|
||||
@@ -3845,7 +3962,7 @@ def check_for_backend_compilation():
|
||||
break
|
||||
koboldai_vars.checking = False
|
||||
|
||||
def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False):
|
||||
def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False):
|
||||
# Ignore new submissions if the AI is currently busy
|
||||
if(koboldai_vars.aibusy):
|
||||
return
|
||||
@@ -3853,11 +3970,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
||||
while(True):
|
||||
set_aibusy(1)
|
||||
koboldai_vars.actions.clear_unused_options()
|
||||
if(koboldai_vars.model == "API"):
|
||||
if(koboldai_vars.model in ["API","CLUSTER"]):
|
||||
global tokenizer
|
||||
tokenizer_id = requests.get(
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/model",
|
||||
).json()["result"]
|
||||
if koboldai_vars.model == "API":
|
||||
tokenizer_id = requests.get(
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/model",
|
||||
).json()["result"]
|
||||
elif len(koboldai_vars.cluster_requested_models) >= 1:
|
||||
# If the player has requested one or more models, we use the first one for the tokenizer
|
||||
tokenizer_id = koboldai_vars.cluster_requested_models[0]
|
||||
# The cluster can return any number of possible models for each gen, but this happens after this step
|
||||
# So at this point, this is unknown
|
||||
else:
|
||||
tokenizer_id = ""
|
||||
if tokenizer_id != koboldai_vars.api_tokenizer_id:
|
||||
try:
|
||||
if(os.path.isdir(tokenizer_id)):
|
||||
@@ -4089,6 +4214,8 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
|
||||
raise NotImplementedError("API generation is not supported in old Colab API mode.")
|
||||
elif(koboldai_vars.model == "API"):
|
||||
raise NotImplementedError("API generation is not supported in API mode.")
|
||||
elif(koboldai_vars.model == "CLUSTER"):
|
||||
raise NotImplementedError("API generation is not supported in API mode.")
|
||||
elif(koboldai_vars.model == "OAI"):
|
||||
raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.")
|
||||
elif(koboldai_vars.model == "ReadOnly"):
|
||||
@@ -4148,7 +4275,7 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
|
||||
minimum = len(tokens) + 1
|
||||
maximum = len(tokens) + koboldai_vars.genamt
|
||||
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
genout = apiactionsubmit_generate(tokens, minimum, maximum)
|
||||
elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
genout = apiactionsubmit_tpumtjgenerate(tokens, minimum, maximum)
|
||||
@@ -4278,8 +4405,8 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||
|
||||
if(actionlen == 0):
|
||||
# First/Prompt action
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
|
||||
assert len(tokens) <= koboldai_vars.max_length - lnsp - koboldai_vars.genamt - budget_deduction
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
|
||||
assert len(tokens) <= koboldai_vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||
ln = len(tokens) + lnsp
|
||||
return tokens, ln+1, ln+koboldai_vars.genamt
|
||||
else:
|
||||
@@ -4327,13 +4454,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||
# Did we get to add the A.N.? If not, do it here
|
||||
if(anotetxt != ""):
|
||||
if((not anoteadded) or forceanote):
|
||||
# header, mem, wi, anote, prompt, actions
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||
else:
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||
else:
|
||||
# Prepend Memory, WI, and Prompt before action tokens
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||
tokens = (tokenizer._koboldai_header if koboldai_vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||
|
||||
# Send completed bundle to generator
|
||||
assert len(tokens) <= koboldai_vars.max_length - lnsp - koboldai_vars.genamt - budget_deduction
|
||||
@@ -4360,23 +4486,27 @@ def calcsubmit(txt):
|
||||
else:
|
||||
subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, koboldai_vars.actions, submission=txt)
|
||||
if(actionlen == 0):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
generate(subtxt, min, max, found_entries=found_entries)
|
||||
elif(koboldai_vars.model == "Colab"):
|
||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "API"):
|
||||
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "CLUSTER"):
|
||||
sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "OAI"):
|
||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
|
||||
else:
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
generate(subtxt, min, max, found_entries=found_entries)
|
||||
elif(koboldai_vars.model == "Colab"):
|
||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "API"):
|
||||
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "CLUSTER"):
|
||||
sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.model == "OAI"):
|
||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
@@ -4828,18 +4958,102 @@ def sendtoapi(txt, min, max):
|
||||
if(len(genout) == 1):
|
||||
genresult(genout[0])
|
||||
else:
|
||||
adjusted_genout = []
|
||||
for item in genout:
|
||||
adjusted_genout.append({"generated_text": item})
|
||||
# Convert torch output format to transformers
|
||||
seqs = []
|
||||
for seq in genout:
|
||||
for seq in adjusted_genout:
|
||||
seqs.append({"generated_text": seq})
|
||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
else:
|
||||
genselect(genout)
|
||||
genselect(adjusted_genout)
|
||||
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
#==================================================================#
|
||||
# Send transformers-style request to KoboldAI Cluster
|
||||
#==================================================================#
|
||||
def sendtocluster(txt, min, max):
|
||||
# Log request to console
|
||||
if not vars.quiet:
|
||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
||||
|
||||
# Store context in memory to use it for comparison with generated content
|
||||
vars.lastctx = txt
|
||||
|
||||
# Build request JSON data
|
||||
reqdata = {
|
||||
'max_length': max - min + 1,
|
||||
'max_context_length': vars.max_length,
|
||||
'rep_pen': vars.rep_pen,
|
||||
'rep_pen_slope': vars.rep_pen_slope,
|
||||
'rep_pen_range': vars.rep_pen_range,
|
||||
'temperature': vars.temp,
|
||||
'top_p': vars.top_p,
|
||||
'top_k': vars.top_k,
|
||||
'top_a': vars.top_a,
|
||||
'tfs': vars.tfs,
|
||||
'typical': vars.typical,
|
||||
'n': vars.numseqs,
|
||||
}
|
||||
cluster_metadata = {
|
||||
'prompt': txt,
|
||||
'params': reqdata,
|
||||
'username': vars.apikey,
|
||||
'models': vars.cluster_requested_models,
|
||||
}
|
||||
|
||||
# Create request
|
||||
req = requests.post(
|
||||
vars.colaburl[:-8] + "/generate/sync",
|
||||
json=cluster_metadata,
|
||||
)
|
||||
js = req.json()
|
||||
if(req.status_code == 503):
|
||||
errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths."
|
||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
if(req.status_code != 200):
|
||||
errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
|
||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
genout = js
|
||||
|
||||
for i in range(vars.numseqs):
|
||||
vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||
|
||||
execute_outmod()
|
||||
if(vars.lua_koboldbridge.regeneration_required):
|
||||
vars.lua_koboldbridge.regeneration_required = False
|
||||
genout = []
|
||||
for i in range(vars.numseqs):
|
||||
genout.append(vars.lua_koboldbridge.outputs[i+1])
|
||||
assert type(genout[-1]) is str
|
||||
|
||||
if(len(genout) == 1):
|
||||
genresult(genout[0])
|
||||
else:
|
||||
adjusted_genout = []
|
||||
for item in genout:
|
||||
adjusted_genout.append({"generated_text": item})
|
||||
# Convert torch output format to transformers
|
||||
seqs = []
|
||||
for seq in adjusted_genout:
|
||||
seqs.append({"generated_text": seq})
|
||||
if(vars.lua_koboldbridge.restart_sequence is not None and vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(adjusted_genout[vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
else:
|
||||
genselect(adjusted_genout)
|
||||
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
#==================================================================#
|
||||
# Send text to TPU mesh transformer backend
|
||||
@@ -5669,7 +5883,9 @@ def oairequest(txt, min, max):
|
||||
koboldai_vars.lastctx = txt
|
||||
|
||||
# Build request JSON data
|
||||
if 'GooseAI' in args.configname:
|
||||
# GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
|
||||
# as the vars.model will always be OAI
|
||||
if 'GooseAI' in vars.configname:
|
||||
reqdata = {
|
||||
'prompt': txt,
|
||||
'max_tokens': koboldai_vars.genamt,
|
||||
@@ -8021,7 +8237,7 @@ def post_story_end(body: SubmissionInputSchema):
|
||||
numseqs = koboldai_vars.numseqs
|
||||
koboldai_vars.numseqs = 1
|
||||
try:
|
||||
actionsubmit(body.prompt, force_submit=True, no_generate=True)
|
||||
actionsubmit(body.prompt, force_submit=True, no_generate=True, ignore_aibusy=True)
|
||||
finally:
|
||||
koboldai_vars.disable_set_aibusy = disable_set_aibusy
|
||||
koboldai_vars.standalone = _standalone
|
||||
@@ -9945,6 +10161,32 @@ def get_config_soft_prompt():
|
||||
"""
|
||||
return {"value": koboldai_vars.spfilename.strip()}
|
||||
|
||||
class SoftPromptsListSchema(KoboldSchema):
|
||||
values: List[SoftPromptSettingSchema] = fields.List(fields.Nested(SoftPromptSettingSchema), required=True, metadata={"description": "Array of available softprompts."})
|
||||
|
||||
@api_v1.get("/config/soft_prompts_list")
|
||||
@api_schema_wrap
|
||||
def get_config_soft_prompts_list():
|
||||
"""---
|
||||
get:
|
||||
summary: Retrieve all available softprompt filenames
|
||||
tags:
|
||||
- config
|
||||
responses:
|
||||
200:
|
||||
description: Successful request
|
||||
content:
|
||||
application/json:
|
||||
schema: SoftPromptsListSchema
|
||||
example:
|
||||
values: []
|
||||
"""
|
||||
splist = []
|
||||
for sp in fileops.getspfiles(vars.modeldim):
|
||||
|
||||
splist.append({"value":sp["filename"]})
|
||||
return {"values": splist}
|
||||
|
||||
@api_v1.put("/config/soft_prompt")
|
||||
@api_schema_wrap
|
||||
def put_config_soft_prompt(body: SoftPromptSettingSchema):
|
||||
|
@@ -2919,14 +2919,35 @@ $(document).ready(function(){
|
||||
if (msg.key) {
|
||||
$("#modelkey").removeClass("hidden");
|
||||
$("#modelkey")[0].value = msg.key_value;
|
||||
if (msg.models_on_url) {
|
||||
$("#modelkey")[0].onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': this.value, 'url': document.getElementById("modelurl").value});};
|
||||
$("#modelurl")[0].onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': document.getElementById("modelkey").value, 'url': this.value});};
|
||||
} else {
|
||||
$("#modelkey")[0].onblur = function () {socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});};
|
||||
$("#modelurl")[0].onblur = null;
|
||||
}
|
||||
//if we're in the API list, disable to load button until the model is selected (after the API Key is entered)
|
||||
disableButtons([load_model_accept]);
|
||||
} else {
|
||||
$("#modelkey").addClass("hidden");
|
||||
|
||||
}
|
||||
|
||||
console.log(msg.multi_online_models);
|
||||
if (msg.multi_online_models) {
|
||||
$("#oaimodel")[0].setAttribute("multiple", "");
|
||||
$("#oaimodel")[0].options[0].textContent = "All"
|
||||
} else {
|
||||
$("#oaimodel")[0].removeAttribute("multiple");
|
||||
$("#oaimodel")[0].options[0].textContent = "Select Model(s)"
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (msg.url) {
|
||||
$("#modelurl").removeClass("hidden");
|
||||
if (msg.default_url != null) {
|
||||
$("#modelurl").value = msg.default_url;
|
||||
}
|
||||
} else {
|
||||
$("#modelurl").addClass("hidden");
|
||||
}
|
||||
@@ -3287,7 +3308,11 @@ $(document).ready(function(){
|
||||
}
|
||||
}
|
||||
var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
|
||||
message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': $('#oaimodel')[0].value};
|
||||
models = getSelectedOptions(document.getElementById('oaimodel'));
|
||||
if (models.length == 1) {
|
||||
models = models[0];
|
||||
}
|
||||
message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
|
||||
socket.send(message);
|
||||
loadmodelcontent.html("");
|
||||
hideLoadModelPopup();
|
||||
@@ -3733,3 +3758,27 @@ function upload_file(file_box) {
|
||||
}
|
||||
}
|
||||
|
||||
function getSelectedOptions(element) {
|
||||
// validate element
|
||||
if(!element || !element.options)
|
||||
return []; //or null?
|
||||
|
||||
// return HTML5 implementation of selectedOptions instead.
|
||||
if (element.selectedOptions) {
|
||||
selectedOptions = element.selectedOptions;
|
||||
} else {
|
||||
// you are here because your browser doesn't have the HTML5 selectedOptions
|
||||
var opts = element.options;
|
||||
var selectedOptions = [];
|
||||
for(var i = 0; i < opts.length; i++) {
|
||||
if(opts[i].selected) {
|
||||
selectedOptions.push(opts[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
output = []
|
||||
for (item of selectedOptions) {
|
||||
output.push(item.value);
|
||||
}
|
||||
return output;
|
||||
}
|
@@ -296,12 +296,12 @@
|
||||
<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
|
||||
</div>
|
||||
<div class="popupfooter">
|
||||
<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
|
||||
<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
|
||||
<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
|
||||
<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
|
||||
</div>
|
||||
<div class="popupfooter">
|
||||
<select class="form-control hidden" id="oaimodel"><option value="">Select OAI Model</option></select>
|
||||
<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
|
||||
</div>
|
||||
<div class="popupfooter hidden" id=modellayers>
|
||||
<div class='settingitem' style="width:100%">
|
||||
|
Reference in New Issue
Block a user