mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-02-02 10:36:47 +01:00
Merge branch 'united' of https://github.com/ebolam/KoboldAI into united
This commit is contained in:
commit
569f4cbce4
137
aiserver.py
137
aiserver.py
@ -217,6 +217,7 @@ model_menu = {
|
|||||||
["InferKit API (requires API key)", "InferKit", "", False],
|
["InferKit API (requires API key)", "InferKit", "", False],
|
||||||
# ["KoboldAI Server API (Old Google Colab)", "Colab", "", False],
|
# ["KoboldAI Server API (Old Google Colab)", "Colab", "", False],
|
||||||
["KoboldAI API", "API", "", False],
|
["KoboldAI API", "API", "", False],
|
||||||
|
["KoboldAI Cluster", "CLUSTER", "", False],
|
||||||
["Return to Main Menu", "mainmenu", "", True],
|
["Return to Main Menu", "mainmenu", "", True],
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -318,6 +319,7 @@ class vars:
|
|||||||
colaburl = "" # Ngrok url for Google Colab mode
|
colaburl = "" # Ngrok url for Google Colab mode
|
||||||
apikey = "" # API key to use for InferKit API calls
|
apikey = "" # API key to use for InferKit API calls
|
||||||
oaiapikey = "" # API key to use for OpenAI API calls
|
oaiapikey = "" # API key to use for OpenAI API calls
|
||||||
|
cluster_requested_models = [] # The models which we allow to generate during cluster mode
|
||||||
savedir = getcwd()+"\\stories"
|
savedir = getcwd()+"\\stories"
|
||||||
hascuda = False # Whether torch has detected CUDA on the system
|
hascuda = False # Whether torch has detected CUDA on the system
|
||||||
usegpu = False # Whether to launch pipeline with GPU support
|
usegpu = False # Whether to launch pipeline with GPU support
|
||||||
@ -1287,6 +1289,8 @@ def general_startup(override_args=None):
|
|||||||
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
|
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
|
||||||
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
||||||
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
||||||
|
parser.add_argument("--apikey", help="Specify the API key to use for online services")
|
||||||
|
parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
|
||||||
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
|
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
|
||||||
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
|
parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
|
||||||
parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
|
parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
|
||||||
@ -1335,6 +1339,11 @@ def general_startup(override_args=None):
|
|||||||
vars.model = args.model;
|
vars.model = args.model;
|
||||||
vars.revision = args.revision
|
vars.revision = args.revision
|
||||||
|
|
||||||
|
if args.apikey:
|
||||||
|
vars.apikey = args.apikey
|
||||||
|
if args.req_model:
|
||||||
|
vars.cluster_requested_models = args.req_model
|
||||||
|
|
||||||
if args.colab:
|
if args.colab:
|
||||||
args.remote = True;
|
args.remote = True;
|
||||||
args.override_rename = True;
|
args.override_rename = True;
|
||||||
@ -1484,7 +1493,7 @@ def get_model_info(model, directory=""):
|
|||||||
|
|
||||||
|
|
||||||
def get_layer_count(model, directory=""):
|
def get_layer_count(model, directory=""):
|
||||||
if(model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||||
if(model == "GPT2Custom"):
|
if(model == "GPT2Custom"):
|
||||||
with open(os.path.join(directory, "config.json"), "r") as f:
|
with open(os.path.join(directory, "config.json"), "r") as f:
|
||||||
model_config = json.load(f)
|
model_config = json.load(f)
|
||||||
@ -2087,7 +2096,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
|
|
||||||
|
|
||||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||||
if(vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
vars.allowsp = True
|
vars.allowsp = True
|
||||||
# Test for GPU support
|
# Test for GPU support
|
||||||
|
|
||||||
@ -2126,7 +2135,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
||||||
vars.model_type = "gpt_neo"
|
vars.model_type = "gpt_neo"
|
||||||
|
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
loadmodelsettings()
|
loadmodelsettings()
|
||||||
loadsettings()
|
loadsettings()
|
||||||
print(2)
|
print(2)
|
||||||
@ -2180,7 +2189,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
vars.noai = True
|
vars.noai = True
|
||||||
|
|
||||||
# Start transformers and create pipeline
|
# Start transformers and create pipeline
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
if(not vars.noai):
|
if(not vars.noai):
|
||||||
print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
|
print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
|
||||||
for m in ("GPTJModel", "XGLMModel"):
|
for m in ("GPTJModel", "XGLMModel"):
|
||||||
@ -2635,7 +2644,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
}
|
}
|
||||||
|
|
||||||
# If we're running Colab or OAI, we still need a tokenizer.
|
# If we're running Colab or OAI, we still need a tokenizer.
|
||||||
if(vars.model in ("Colab", "API")):
|
if(vars.model in ("Colab", "API", "CLUSTER")):
|
||||||
from transformers import GPT2TokenizerFast
|
from transformers import GPT2TokenizerFast
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
|
||||||
loadsettings()
|
loadsettings()
|
||||||
@ -3281,7 +3290,7 @@ def lua_set_chunk(k, v):
|
|||||||
def lua_get_modeltype():
|
def lua_get_modeltype():
|
||||||
if(vars.noai):
|
if(vars.noai):
|
||||||
return "readonly"
|
return "readonly"
|
||||||
if(vars.model in ("Colab", "API", "OAI", "InferKit")):
|
if(vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
|
||||||
return "api"
|
return "api"
|
||||||
if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
|
if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
|
||||||
hidden_size = get_hidden_size_from_model(model)
|
hidden_size = get_hidden_size_from_model(model)
|
||||||
@ -3310,7 +3319,7 @@ def lua_get_modeltype():
|
|||||||
def lua_get_modelbackend():
|
def lua_get_modelbackend():
|
||||||
if(vars.noai):
|
if(vars.noai):
|
||||||
return "readonly"
|
return "readonly"
|
||||||
if(vars.model in ("Colab", "API", "OAI", "InferKit")):
|
if(vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")):
|
||||||
return "api"
|
return "api"
|
||||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||||
return "mtj"
|
return "mtj"
|
||||||
@ -4033,11 +4042,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
|||||||
while(True):
|
while(True):
|
||||||
set_aibusy(1)
|
set_aibusy(1)
|
||||||
|
|
||||||
if(vars.model == "API"):
|
if(vars.model in ["API","CLUSTER"]):
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer_id = requests.get(
|
if vars.model == "API":
|
||||||
vars.colaburl[:-8] + "/api/v1/model",
|
tokenizer_id = requests.get(
|
||||||
).json()["result"]
|
vars.colaburl[:-8] + "/api/v1/model",
|
||||||
|
).json()["result"]
|
||||||
|
elif len(vars.cluster_requested_models) >= 1:
|
||||||
|
# If the player has requested one or more models, we use the first one for the tokenizer
|
||||||
|
tokenizer_id = vars.cluster_requested_models[0]
|
||||||
|
# The cluster can return any number of possible models for each gen, but this happens after this step
|
||||||
|
# So at this point, this is unknown
|
||||||
|
else:
|
||||||
|
tokenizer_id = ""
|
||||||
if tokenizer_id != vars.api_tokenizer_id:
|
if tokenizer_id != vars.api_tokenizer_id:
|
||||||
try:
|
try:
|
||||||
if(os.path.isdir(tokenizer_id)):
|
if(os.path.isdir(tokenizer_id)):
|
||||||
@ -4283,6 +4300,8 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
|
|||||||
raise NotImplementedError("API generation is not supported in old Colab API mode.")
|
raise NotImplementedError("API generation is not supported in old Colab API mode.")
|
||||||
elif(vars.model == "API"):
|
elif(vars.model == "API"):
|
||||||
raise NotImplementedError("API generation is not supported in API mode.")
|
raise NotImplementedError("API generation is not supported in API mode.")
|
||||||
|
elif(vars.model == "CLUSTER"):
|
||||||
|
raise NotImplementedError("API generation is not supported in API mode.")
|
||||||
elif(vars.model == "OAI"):
|
elif(vars.model == "OAI"):
|
||||||
raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.")
|
raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.")
|
||||||
elif(vars.model == "ReadOnly"):
|
elif(vars.model == "ReadOnly"):
|
||||||
@ -4333,7 +4352,7 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals
|
|||||||
minimum = len(tokens) + 1
|
minimum = len(tokens) + 1
|
||||||
maximum = len(tokens) + vars.genamt
|
maximum = len(tokens) + vars.genamt
|
||||||
|
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
genout = apiactionsubmit_generate(tokens, minimum, maximum)
|
genout = apiactionsubmit_generate(tokens, minimum, maximum)
|
||||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||||
genout = apiactionsubmit_tpumtjgenerate(tokens, minimum, maximum)
|
genout = apiactionsubmit_tpumtjgenerate(tokens, minimum, maximum)
|
||||||
@ -4501,7 +4520,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
|
|
||||||
if(actionlen == 0):
|
if(actionlen == 0):
|
||||||
# First/Prompt action
|
# First/Prompt action
|
||||||
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
|
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
ln = len(tokens) + lnsp
|
ln = len(tokens) + lnsp
|
||||||
return tokens, ln+1, ln+vars.genamt
|
return tokens, ln+1, ln+vars.genamt
|
||||||
@ -4549,12 +4568,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
# Did we get to add the A.N.? If not, do it here
|
# Did we get to add the A.N.? If not, do it here
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
if((not anoteadded) or forceanote):
|
if((not anoteadded) or forceanote):
|
||||||
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
|
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
# Prepend Memory, WI, and Prompt before action tokens
|
# Prepend Memory, WI, and Prompt before action tokens
|
||||||
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens
|
||||||
|
|
||||||
# Send completed bundle to generator
|
# Send completed bundle to generator
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
@ -4576,23 +4595,27 @@ def calcsubmit(txt):
|
|||||||
if(vars.model != "InferKit"):
|
if(vars.model != "InferKit"):
|
||||||
subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt)
|
subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt)
|
||||||
if(actionlen == 0):
|
if(actionlen == 0):
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
generate(subtxt, min, max, found_entries=found_entries)
|
generate(subtxt, min, max, found_entries=found_entries)
|
||||||
elif(vars.model == "Colab"):
|
elif(vars.model == "Colab"):
|
||||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.model == "API"):
|
elif(vars.model == "API"):
|
||||||
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
|
elif(vars.model == "CLUSTER"):
|
||||||
|
sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.model == "OAI"):
|
elif(vars.model == "OAI"):
|
||||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||||
tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
|
tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
|
||||||
else:
|
else:
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
generate(subtxt, min, max, found_entries=found_entries)
|
generate(subtxt, min, max, found_entries=found_entries)
|
||||||
elif(vars.model == "Colab"):
|
elif(vars.model == "Colab"):
|
||||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.model == "API"):
|
elif(vars.model == "API"):
|
||||||
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
|
elif(vars.model == "CLUSTER"):
|
||||||
|
sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.model == "OAI"):
|
elif(vars.model == "OAI"):
|
||||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||||
@ -5072,6 +5095,84 @@ def sendtoapi(txt, min, max):
|
|||||||
set_aibusy(0)
|
set_aibusy(0)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
#==================================================================#
|
||||||
|
# Send transformers-style request to KoboldAI Cluster
|
||||||
|
#==================================================================#
|
||||||
|
def sendtocluster(txt, min, max):
|
||||||
|
# Log request to console
|
||||||
|
if not vars.quiet:
|
||||||
|
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
||||||
|
|
||||||
|
# Store context in memory to use it for comparison with generated content
|
||||||
|
vars.lastctx = txt
|
||||||
|
|
||||||
|
# Build request JSON data
|
||||||
|
reqdata = {
|
||||||
|
'max_length': max - min + 1,
|
||||||
|
'max_context_length': vars.max_length,
|
||||||
|
'rep_pen': vars.rep_pen,
|
||||||
|
'rep_pen_slope': vars.rep_pen_slope,
|
||||||
|
'rep_pen_range': vars.rep_pen_range,
|
||||||
|
'temperature': vars.temp,
|
||||||
|
'top_p': vars.top_p,
|
||||||
|
'top_k': vars.top_k,
|
||||||
|
'top_a': vars.top_a,
|
||||||
|
'tfs': vars.tfs,
|
||||||
|
'typical': vars.typical,
|
||||||
|
'n': vars.numseqs,
|
||||||
|
}
|
||||||
|
cluster_metadata = {
|
||||||
|
'prompt': txt,
|
||||||
|
'params': reqdata,
|
||||||
|
'username': vars.apikey,
|
||||||
|
'models': vars.cluster_requested_models,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create request
|
||||||
|
req = requests.post(
|
||||||
|
vars.colaburl[:-8] + "/generate/sync",
|
||||||
|
json=cluster_metadata,
|
||||||
|
)
|
||||||
|
js = req.json()
|
||||||
|
if(req.status_code == 503):
|
||||||
|
errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||||
|
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||||
|
set_aibusy(0)
|
||||||
|
return
|
||||||
|
if(req.status_code != 200):
|
||||||
|
errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||||
|
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||||
|
set_aibusy(0)
|
||||||
|
return
|
||||||
|
genout = js
|
||||||
|
|
||||||
|
for i in range(vars.numseqs):
|
||||||
|
vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||||
|
|
||||||
|
execute_outmod()
|
||||||
|
if(vars.lua_koboldbridge.regeneration_required):
|
||||||
|
vars.lua_koboldbridge.regeneration_required = False
|
||||||
|
genout = []
|
||||||
|
for i in range(vars.numseqs):
|
||||||
|
genout.append(vars.lua_koboldbridge.outputs[i+1])
|
||||||
|
assert type(genout[-1]) is str
|
||||||
|
|
||||||
|
if(len(genout) == 1):
|
||||||
|
genresult(genout[0])
|
||||||
|
else:
|
||||||
|
# Convert torch output format to transformers
|
||||||
|
seqs = []
|
||||||
|
for seq in genout:
|
||||||
|
seqs.append({"generated_text": seq})
|
||||||
|
if(vars.lua_koboldbridge.restart_sequence is not None and vars.lua_koboldbridge.restart_sequence > 0):
|
||||||
|
genresult(genout[vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||||
|
else:
|
||||||
|
genselect(genout)
|
||||||
|
|
||||||
|
set_aibusy(0)
|
||||||
|
return
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Send text to TPU mesh transformer backend
|
# Send text to TPU mesh transformer backend
|
||||||
|
Loading…
x
Reference in New Issue
Block a user