mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix conflict
...and bugs
This commit is contained in:
401
aiserver.py
401
aiserver.py
@@ -5156,7 +5156,7 @@ def raw_generate(
|
||||
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
||||
)
|
||||
elif koboldai_vars.model in model_functions:
|
||||
model_functions[koboldai_vars.model](
|
||||
batch_encoded = model_functions[koboldai_vars.model](
|
||||
prompt_tokens=prompt_tokens,
|
||||
max_new=max_new,
|
||||
batch_count=batch_count,
|
||||
@@ -5374,40 +5374,89 @@ def cluster_raw_generate(
|
||||
'prompt': decoded_prompt,
|
||||
'params': reqdata,
|
||||
'api_key': koboldai_vars.apikey,
|
||||
'models': koboldai_vars.cluster_requested_models,
|
||||
'models': [x for x in koboldai_vars.cluster_requested_models if x],
|
||||
}
|
||||
|
||||
try:
|
||||
# Create request
|
||||
req = requests.post(
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate/sync",
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate/async",
|
||||
json=cluster_metadata,
|
||||
)
|
||||
js = req.json()
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
if req.status_code == 503:
|
||||
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
elif not req.ok:
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
try:
|
||||
js = req.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
errmsg = f"Unexpected message received from the Horde: '{req.text}'"
|
||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
if(req.status_code == 503):
|
||||
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||
raise HordeException(errmsg)
|
||||
if(req.status_code != 200):
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||
raise HordeException(errmsg)
|
||||
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
||||
print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}")
|
||||
|
||||
request_id = js["id"]
|
||||
logger.debug("Horde Request ID: {}".format(request_id))
|
||||
|
||||
# TODO: Fix this. Request context issues!!
|
||||
# We've sent the request and got the ID back, now we need to watch it to see when it finishes
|
||||
finished = False
|
||||
|
||||
while not finished:
|
||||
try:
|
||||
req = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/check/" + request_id)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
if not req.ok:
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(req.text)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
try:
|
||||
js = req.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'"
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
if "done" not in js:
|
||||
errmsg = f"Unexpected response received from the KoboldAI Horde: '{js}'"
|
||||
logger.error(errmsg )
|
||||
raise HordeException(errmsg)
|
||||
|
||||
finished = js["done"]
|
||||
koboldai_vars.horde_wait_time = js["wait_time"]
|
||||
koboldai_vars.horde_queue_position = js["queue_position"]
|
||||
koboldai_vars.horde_queue_size = js["waiting"]
|
||||
|
||||
if not finished:
|
||||
logger.debug(js)
|
||||
time.sleep(1)
|
||||
|
||||
logger.debug("Last Horde Status Message: {}".format(js))
|
||||
js = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/prompt/" + request_id).json()['generations']
|
||||
logger.debug("Horde Result: {}".format(js))
|
||||
|
||||
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
||||
logger.info(f"Generations by: {gen_servers}")
|
||||
|
||||
# TODO: Fix this, using tpool so it's a context error
|
||||
# Just in case we want to announce it to the user
|
||||
# if len(js) == 1:
|
||||
# warnmsg = f"Text generated by {js[0]['server_name']}"
|
||||
# emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
|
||||
|
||||
return np.array([tokenizer.encode(cgen["text"]) for cgen in js])
|
||||
|
||||
def colab_raw_generate(
|
||||
@@ -5512,7 +5561,6 @@ def api_raw_generate(
|
||||
#==================================================================#
|
||||
|
||||
def generate(txt, minimum, maximum, found_entries=None):
|
||||
print("ring ring", txt, minimum, maximum, found_entries)
|
||||
koboldai_vars.generated_tkns = 0
|
||||
|
||||
if(found_entries is None):
|
||||
@@ -5662,323 +5710,6 @@ def pinsequence(n):
|
||||
|
||||
|
||||
#==================================================================#
|
||||
# BEGIN CONFLICT
|
||||
# Send transformers-style request to ngrok/colab host
|
||||
#==================================================================#
|
||||
def sendtocolab(txt, min, max):
|
||||
# Log request to console
|
||||
if not koboldai_vars.quiet:
|
||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
||||
|
||||
# Store context in memory to use it for comparison with generated content
|
||||
koboldai_vars.lastctx = txt
|
||||
|
||||
# Build request JSON data
|
||||
reqdata = {
|
||||
'text': txt,
|
||||
'min': min,
|
||||
'max': max,
|
||||
'rep_pen': koboldai_vars.rep_pen,
|
||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
||||
'temperature': koboldai_vars.temp,
|
||||
'top_p': koboldai_vars.top_p,
|
||||
'top_k': koboldai_vars.top_k,
|
||||
'tfs': koboldai_vars.tfs,
|
||||
'typical': koboldai_vars.typical,
|
||||
'topa': koboldai_vars.top_a,
|
||||
'numseqs': koboldai_vars.numseqs,
|
||||
'retfultxt': False
|
||||
}
|
||||
|
||||
# Create request
|
||||
req = requests.post(
|
||||
koboldai_vars.colaburl,
|
||||
json = reqdata
|
||||
)
|
||||
|
||||
# Deal with the response
|
||||
if(req.status_code == 200):
|
||||
js = req.json()["data"]
|
||||
|
||||
# Try to be backwards compatible with outdated colab
|
||||
if("text" in js):
|
||||
genout = [getnewcontent(js["text"])]
|
||||
else:
|
||||
genout = js["seqs"]
|
||||
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||
|
||||
execute_outmod()
|
||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
||||
genout = []
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
||||
assert type(genout[-1]) is str
|
||||
|
||||
koboldai_vars.actions.clear_unused_options()
|
||||
koboldai_vars.actions.append_options([applyoutputformatting(x["generated_text"]) for x in genout])
|
||||
genout = [{"generated_text": x['text']} for x in koboldai_vars.actions.get_current_options()]
|
||||
if(len(genout) == 1):
|
||||
|
||||
genresult(genout[0])
|
||||
else:
|
||||
# Convert torch output format to transformers
|
||||
seqs = []
|
||||
for seq in genout:
|
||||
seqs.append({"generated_text": seq})
|
||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
else:
|
||||
genselect(genout)
|
||||
|
||||
# Format output before continuing
|
||||
#genout = applyoutputformatting(getnewcontent(genout))
|
||||
|
||||
# Add formatted text to Actions array and refresh the game screen
|
||||
#koboldai_vars.actions.append(genout)
|
||||
#refresh_story()
|
||||
#emit('from_server', {'cmd': 'texteffect', 'data': koboldai_vars.actions.get_last_key() + 1 if len(koboldai_vars.actions) else 0})
|
||||
|
||||
set_aibusy(0)
|
||||
else:
|
||||
errmsg = "Colab API Error: Failed to get a reply from the server. Please check the colab console."
|
||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True, room="UI_1")
|
||||
set_aibusy(0)
|
||||
|
||||
|
||||
#==================================================================#
|
||||
# Send transformers-style request to KoboldAI API
|
||||
#==================================================================#
|
||||
def sendtoapi(txt, min, max):
|
||||
# Log request to console
|
||||
if not koboldai_vars.quiet:
|
||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
||||
|
||||
# Store context in memory to use it for comparison with generated content
|
||||
koboldai_vars.lastctx = txt
|
||||
|
||||
# Build request JSON data
|
||||
reqdata = {
|
||||
'prompt': txt,
|
||||
'max_length': max - min + 1,
|
||||
'max_context_length': koboldai_vars.max_length,
|
||||
'rep_pen': koboldai_vars.rep_pen,
|
||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
||||
'temperature': koboldai_vars.temp,
|
||||
'top_p': koboldai_vars.top_p,
|
||||
'top_k': koboldai_vars.top_k,
|
||||
'top_a': koboldai_vars.top_a,
|
||||
'tfs': koboldai_vars.tfs,
|
||||
'typical': koboldai_vars.typical,
|
||||
'n': koboldai_vars.numseqs,
|
||||
}
|
||||
|
||||
# Create request
|
||||
while True:
|
||||
req = requests.post(
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate",
|
||||
json=reqdata,
|
||||
)
|
||||
if(req.status_code == 503): # Server is currently generating something else so poll until it's our turn
|
||||
time.sleep(1)
|
||||
continue
|
||||
js = req.json()
|
||||
if(req.status_code != 200):
|
||||
errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
|
||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
emit("error", errmsg, broadcast=True, room="UI_2")
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
genout = [obj["text"] for obj in js["results"]]
|
||||
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||
|
||||
execute_outmod()
|
||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
||||
genout = []
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
||||
assert type(genout[-1]) is str
|
||||
|
||||
if(len(genout) == 1):
|
||||
genresult(genout[0])
|
||||
else:
|
||||
adjusted_genout = []
|
||||
for item in genout:
|
||||
adjusted_genout.append({"generated_text": item})
|
||||
# Convert torch output format to transformers
|
||||
seqs = []
|
||||
for seq in adjusted_genout:
|
||||
seqs.append({"generated_text": seq})
|
||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
else:
|
||||
genselect(adjusted_genout)
|
||||
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
#==================================================================#
|
||||
# Send transformers-style request to KoboldAI Cluster
|
||||
#==================================================================#
|
||||
def sendtocluster(txt, min, max):
|
||||
# Log request to console
|
||||
if not koboldai_vars.quiet:
|
||||
logger.debug(f"Tokens Min:{min-1}")
|
||||
logger.prompt(txt.encode("unicode_escape").decode("utf-8"))
|
||||
|
||||
# Store context in memory to use it for comparison with generated content
|
||||
koboldai_vars.lastctx = txt
|
||||
# Build request JSON data
|
||||
reqdata = {
|
||||
'max_length': max - min + 1,
|
||||
'max_context_length': koboldai_vars.max_length,
|
||||
'rep_pen': koboldai_vars.rep_pen,
|
||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
||||
'temperature': koboldai_vars.temp,
|
||||
'top_p': koboldai_vars.top_p,
|
||||
'top_k': koboldai_vars.top_k,
|
||||
'top_a': koboldai_vars.top_a,
|
||||
'tfs': koboldai_vars.tfs,
|
||||
'typical': koboldai_vars.typical,
|
||||
'n': koboldai_vars.numseqs,
|
||||
}
|
||||
cluster_metadata = {
|
||||
'prompt': txt,
|
||||
'params': reqdata,
|
||||
'api_key': koboldai_vars.apikey,
|
||||
'models': koboldai_vars.cluster_requested_models,
|
||||
}
|
||||
if cluster_metadata["models"] == [""]:
|
||||
cluster_metadata["models"] = []
|
||||
logger.debug(f"Horde Payload: {cluster_metadata}")
|
||||
try:
|
||||
# Create request
|
||||
req = requests.post(
|
||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate/async",
|
||||
json=cluster_metadata,
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
if(req.status_code == 503):
|
||||
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
||||
logger.error(req.text)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
if(not req.ok):
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(req.text)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
try:
|
||||
js = req.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
request_id = js['id']
|
||||
logger.debug("Horde Request ID: {}".format(request_id))
|
||||
#We've sent the request and got the ID back, now we need to watch it to see when it finishes
|
||||
finished = False
|
||||
while not finished:
|
||||
try:
|
||||
req = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/check/" + request_id)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
if(not req.ok):
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(req.text)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
try:
|
||||
js = req.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'"
|
||||
logger.error(errmsg)
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
if not "done" in js:
|
||||
errmsg = f"Unexpected response received from the KoboldAI Horde: '{js}'"
|
||||
logger.error(errmsg )
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||
set_aibusy(0)
|
||||
return
|
||||
finished = js["done"]
|
||||
koboldai_vars.horde_wait_time = js["wait_time"]
|
||||
koboldai_vars.horde_queue_position = js["queue_position"]
|
||||
koboldai_vars.horde_queue_size = js["waiting"]
|
||||
if not finished:
|
||||
logger.debug(js)
|
||||
time.sleep(1)
|
||||
|
||||
logger.debug("Last Horde Status Message: {}".format(js))
|
||||
js = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/prompt/" + request_id).json()['generations']
|
||||
logger.debug("Horde Result: {}".format(js))
|
||||
|
||||
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
||||
logger.info(f"Generations by: {gen_servers}")
|
||||
# Just in case we want to announce it to the user
|
||||
if len(js) == 1:
|
||||
warnmsg = f"Text generated by {js[0]['server_name']}"
|
||||
emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
|
||||
genout = [cgen['text'] for cgen in js]
|
||||
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
||||
|
||||
execute_outmod()
|
||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
||||
genout = []
|
||||
for i in range(koboldai_vars.numseqs):
|
||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
||||
assert type(genout[-1]) is str
|
||||
|
||||
if(len(genout) == 1):
|
||||
genresult(genout[0])
|
||||
else:
|
||||
adjusted_genout = []
|
||||
for item in genout:
|
||||
adjusted_genout.append({"generated_text": item})
|
||||
# Convert torch output format to transformers
|
||||
seqs = []
|
||||
for seq in adjusted_genout:
|
||||
seqs.append({"generated_text": seq})
|
||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
else:
|
||||
genselect(adjusted_genout)
|
||||
|
||||
set_aibusy(0)
|
||||
return
|
||||
|
||||
#==================================================================#
|
||||
# END CONFLICT
|
||||
# Send text to TPU mesh transformer backend
|
||||
#==================================================================#
|
||||
def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
|
||||
|
Reference in New Issue
Block a user