From 8f795b427002ceb48334be936218649c2a8bc2de Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 24 Sep 2022 16:24:31 -0500 Subject: [PATCH] Fix conflict ...and bugs --- aiserver.py | 401 +++++++++------------------------------------------- 1 file changed, 66 insertions(+), 335 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0c6d7114..e2dd6905 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5156,7 +5156,7 @@ def raw_generate( out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True ) elif koboldai_vars.model in model_functions: - model_functions[koboldai_vars.model]( + batch_encoded = model_functions[koboldai_vars.model]( prompt_tokens=prompt_tokens, max_new=max_new, batch_count=batch_count, @@ -5374,40 +5374,89 @@ def cluster_raw_generate( 'prompt': decoded_prompt, 'params': reqdata, 'api_key': koboldai_vars.apikey, - 'models': koboldai_vars.cluster_requested_models, + 'models': [x for x in koboldai_vars.cluster_requested_models if x], } try: # Create request req = requests.post( - koboldai_vars.colaburl[:-8] + "/api/v1/generate/sync", + koboldai_vars.colaburl[:-8] + "/api/v1/generate/async", json=cluster_metadata, ) - js = req.json() except requests.exceptions.ConnectionError: errmsg = f"Horde unavailable. Please try again later" - print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) + logger.error(errmsg) raise HordeException(errmsg) + + if req.status_code == 503: + errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." + logger.error(errmsg) + raise HordeException(errmsg) + elif not req.ok: + errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." + logger.error(errmsg) + raise HordeException(errmsg) + + try: + js = req.json() except requests.exceptions.JSONDecodeError: errmsg = f"Unexpected message received from the Horde: '{req.text}'" - print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) + logger.error(errmsg) raise HordeException(errmsg) - if(req.status_code == 503): - errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." - print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) - raise HordeException(errmsg) - if(req.status_code != 200): - errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." - print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) - raise HordeException(errmsg) - gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js] - print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}") + + request_id = js["id"] + logger.debug("Horde Request ID: {}".format(request_id)) - # TODO: Fix this. Request context issues!! + # We've sent the request and got the ID back, now we need to watch it to see when it finishes + finished = False + + while not finished: + try: + req = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/check/" + request_id) + except requests.exceptions.ConnectionError: + errmsg = f"Horde unavailable. Please try again later" + logger.error(errmsg) + raise HordeException(errmsg) + + if not req.ok: + errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." + logger.error(req.text) + raise HordeException(errmsg) + + try: + js = req.json() + except requests.exceptions.JSONDecodeError: + errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'" + logger.error(errmsg) + raise HordeException(errmsg) + + if "done" not in js: + errmsg = f"Unexpected response received from the KoboldAI Horde: '{js}'" + logger.error(errmsg ) + raise HordeException(errmsg) + + finished = js["done"] + koboldai_vars.horde_wait_time = js["wait_time"] + koboldai_vars.horde_queue_position = js["queue_position"] + koboldai_vars.horde_queue_size = js["waiting"] + + if not finished: + logger.debug(js) + time.sleep(1) + + logger.debug("Last Horde Status Message: {}".format(js)) + js = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/prompt/" + request_id).json()['generations'] + logger.debug("Horde Result: {}".format(js)) + + gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js] + logger.info(f"Generations by: {gen_servers}") + + # TODO: Fix this, using tpool so it's a context error # Just in case we want to announce it to the user # if len(js) == 1: # warnmsg = f"Text generated by {js[0]['server_name']}" # emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True) + return np.array([tokenizer.encode(cgen["text"]) for cgen in js]) def colab_raw_generate( @@ -5512,7 +5561,6 @@ def api_raw_generate( #==================================================================# def generate(txt, minimum, maximum, found_entries=None): - print("ring ring", txt, minimum, maximum, found_entries) koboldai_vars.generated_tkns = 0 if(found_entries is None): @@ -5662,323 +5710,6 @@ def pinsequence(n): #==================================================================# -# BEGIN CONFLICT -# Send transformers-style request to ngrok/colab host -#==================================================================# -def sendtocolab(txt, min, max): - # Log request to console - if not koboldai_vars.quiet: - print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END)) - - # Store context in memory to use it for comparison with generated content - koboldai_vars.lastctx = txt - - # Build request JSON data - reqdata = { - 'text': txt, - 'min': min, - 'max': max, - 'rep_pen': koboldai_vars.rep_pen, - 'rep_pen_slope': koboldai_vars.rep_pen_slope, - 'rep_pen_range': koboldai_vars.rep_pen_range, - 'temperature': koboldai_vars.temp, - 'top_p': koboldai_vars.top_p, - 'top_k': koboldai_vars.top_k, - 'tfs': koboldai_vars.tfs, - 'typical': koboldai_vars.typical, - 'topa': koboldai_vars.top_a, - 'numseqs': koboldai_vars.numseqs, - 'retfultxt': False - } - - # Create request - req = requests.post( - koboldai_vars.colaburl, - json = reqdata - ) - - # Deal with the response - if(req.status_code == 200): - js = req.json()["data"] - - # Try to be backwards compatible with outdated colab - if("text" in js): - genout = [getnewcontent(js["text"])] - else: - genout = js["seqs"] - - for i in range(koboldai_vars.numseqs): - koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i] - - execute_outmod() - if(koboldai_vars.lua_koboldbridge.regeneration_required): - koboldai_vars.lua_koboldbridge.regeneration_required = False - genout = [] - for i in range(koboldai_vars.numseqs): - genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1]) - assert type(genout[-1]) is str - - koboldai_vars.actions.clear_unused_options() - koboldai_vars.actions.append_options([applyoutputformatting(x["generated_text"]) for x in genout]) - genout = [{"generated_text": x['text']} for x in koboldai_vars.actions.get_current_options()] - if(len(genout) == 1): - - genresult(genout[0]) - else: - # Convert torch output format to transformers - seqs = [] - for seq in genout: - seqs.append({"generated_text": seq}) - if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0): - genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"]) - else: - genselect(genout) - - # Format output before continuing - #genout = applyoutputformatting(getnewcontent(genout)) - - # Add formatted text to Actions array and refresh the game screen - #koboldai_vars.actions.append(genout) - #refresh_story() - #emit('from_server', {'cmd': 'texteffect', 'data': koboldai_vars.actions.get_last_key() + 1 if len(koboldai_vars.actions) else 0}) - - set_aibusy(0) - else: - errmsg = "Colab API Error: Failed to get a reply from the server. Please check the colab console." - print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True, room="UI_1") - set_aibusy(0) - - -#==================================================================# -# Send transformers-style request to KoboldAI API -#==================================================================# -def sendtoapi(txt, min, max): - # Log request to console - if not koboldai_vars.quiet: - print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END)) - - # Store context in memory to use it for comparison with generated content - koboldai_vars.lastctx = txt - - # Build request JSON data - reqdata = { - 'prompt': txt, - 'max_length': max - min + 1, - 'max_context_length': koboldai_vars.max_length, - 'rep_pen': koboldai_vars.rep_pen, - 'rep_pen_slope': koboldai_vars.rep_pen_slope, - 'rep_pen_range': koboldai_vars.rep_pen_range, - 'temperature': koboldai_vars.temp, - 'top_p': koboldai_vars.top_p, - 'top_k': koboldai_vars.top_k, - 'top_a': koboldai_vars.top_a, - 'tfs': koboldai_vars.tfs, - 'typical': koboldai_vars.typical, - 'n': koboldai_vars.numseqs, - } - - # Create request - while True: - req = requests.post( - koboldai_vars.colaburl[:-8] + "/api/v1/generate", - json=reqdata, - ) - if(req.status_code == 503): # Server is currently generating something else so poll until it's our turn - time.sleep(1) - continue - js = req.json() - if(req.status_code != 200): - errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console." - print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - emit("error", errmsg, broadcast=True, room="UI_2") - set_aibusy(0) - return - - genout = [obj["text"] for obj in js["results"]] - - for i in range(koboldai_vars.numseqs): - koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i] - - execute_outmod() - if(koboldai_vars.lua_koboldbridge.regeneration_required): - koboldai_vars.lua_koboldbridge.regeneration_required = False - genout = [] - for i in range(koboldai_vars.numseqs): - genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1]) - assert type(genout[-1]) is str - - if(len(genout) == 1): - genresult(genout[0]) - else: - adjusted_genout = [] - for item in genout: - adjusted_genout.append({"generated_text": item}) - # Convert torch output format to transformers - seqs = [] - for seq in adjusted_genout: - seqs.append({"generated_text": seq}) - if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0): - genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"]) - else: - genselect(adjusted_genout) - - set_aibusy(0) - return - -#==================================================================# -# Send transformers-style request to KoboldAI Cluster -#==================================================================# -def sendtocluster(txt, min, max): - # Log request to console - if not koboldai_vars.quiet: - logger.debug(f"Tokens Min:{min-1}") - logger.prompt(txt.encode("unicode_escape").decode("utf-8")) - - # Store context in memory to use it for comparison with generated content - koboldai_vars.lastctx = txt - # Build request JSON data - reqdata = { - 'max_length': max - min + 1, - 'max_context_length': koboldai_vars.max_length, - 'rep_pen': koboldai_vars.rep_pen, - 'rep_pen_slope': koboldai_vars.rep_pen_slope, - 'rep_pen_range': koboldai_vars.rep_pen_range, - 'temperature': koboldai_vars.temp, - 'top_p': koboldai_vars.top_p, - 'top_k': koboldai_vars.top_k, - 'top_a': koboldai_vars.top_a, - 'tfs': koboldai_vars.tfs, - 'typical': koboldai_vars.typical, - 'n': koboldai_vars.numseqs, - } - cluster_metadata = { - 'prompt': txt, - 'params': reqdata, - 'api_key': koboldai_vars.apikey, - 'models': koboldai_vars.cluster_requested_models, - } - if cluster_metadata["models"] == [""]: - cluster_metadata["models"] = [] - logger.debug(f"Horde Payload: {cluster_metadata}") - try: - # Create request - req = requests.post( - koboldai_vars.colaburl[:-8] + "/api/v1/generate/async", - json=cluster_metadata, - ) - except requests.exceptions.ConnectionError: - errmsg = f"Horde unavailable. Please try again later" - logger.error(errmsg) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - if(req.status_code == 503): - errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." - logger.error(req.text) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - if(not req.ok): - errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." - logger.error(req.text) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - - try: - js = req.json() - except requests.exceptions.JSONDecodeError: - errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'" - logger.error(errmsg) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - request_id = js['id'] - logger.debug("Horde Request ID: {}".format(request_id)) - #We've sent the request and got the ID back, now we need to watch it to see when it finishes - finished = False - while not finished: - try: - req = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/check/" + request_id) - except requests.exceptions.ConnectionError: - errmsg = f"Horde unavailable. Please try again later" - logger.error(errmsg) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - if(not req.ok): - errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." - logger.error(req.text) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - try: - js = req.json() - except requests.exceptions.JSONDecodeError: - errmsg = f"Unexpected message received from the KoboldAI Horde: '{req.text}'" - logger.error(errmsg) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - if not "done" in js: - errmsg = f"Unexpected response received from the KoboldAI Horde: '{js}'" - logger.error(errmsg ) - emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) - set_aibusy(0) - return - finished = js["done"] - koboldai_vars.horde_wait_time = js["wait_time"] - koboldai_vars.horde_queue_position = js["queue_position"] - koboldai_vars.horde_queue_size = js["waiting"] - if not finished: - logger.debug(js) - time.sleep(1) - - logger.debug("Last Horde Status Message: {}".format(js)) - js = requests.get(koboldai_vars.colaburl[:-8] + "/api/v1/generate/prompt/" + request_id).json()['generations'] - logger.debug("Horde Result: {}".format(js)) - - gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js] - logger.info(f"Generations by: {gen_servers}") - # Just in case we want to announce it to the user - if len(js) == 1: - warnmsg = f"Text generated by {js[0]['server_name']}" - emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True) - genout = [cgen['text'] for cgen in js] - - for i in range(koboldai_vars.numseqs): - koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i] - - execute_outmod() - if(koboldai_vars.lua_koboldbridge.regeneration_required): - koboldai_vars.lua_koboldbridge.regeneration_required = False - genout = [] - for i in range(koboldai_vars.numseqs): - genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1]) - assert type(genout[-1]) is str - - if(len(genout) == 1): - genresult(genout[0]) - else: - adjusted_genout = [] - for item in genout: - adjusted_genout.append({"generated_text": item}) - # Convert torch output format to transformers - seqs = [] - for seq in adjusted_genout: - seqs.append({"generated_text": seq}) - if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0): - genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"]) - else: - genselect(adjusted_genout) - - set_aibusy(0) - return - -#==================================================================# -# END CONFLICT # Send text to TPU mesh transformer backend #==================================================================# def tpumtjgenerate(txt, minimum, maximum, found_entries=None):