mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Actual final touches
This commit is contained in:
558
aiserver.py
558
aiserver.py
@@ -4875,7 +4875,7 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
|
|||||||
|
|
||||||
result = raw_generate(
|
result = raw_generate(
|
||||||
gen_in[0],
|
gen_in[0],
|
||||||
max_length=koboldai_vars.genamt,
|
max_new=koboldai_vars.genamt,
|
||||||
do_streaming=True,
|
do_streaming=True,
|
||||||
do_dynamic_wi=True,
|
do_dynamic_wi=True,
|
||||||
batch_count=numseqs,
|
batch_count=numseqs,
|
||||||
@@ -4975,17 +4975,49 @@ class GenerationResult:
|
|||||||
|
|
||||||
self.decoded = [utils.decodenewlines(tokenizer.decode(enc)) for enc in self.encoded]
|
self.decoded = [utils.decodenewlines(tokenizer.decode(enc)) for enc in self.encoded]
|
||||||
|
|
||||||
|
class GenerationSettings:
|
||||||
|
def __init__(self, **overrides) -> None:
|
||||||
|
for setting in [
|
||||||
|
"temp",
|
||||||
|
"top_p",
|
||||||
|
"top_k",
|
||||||
|
"tfs",
|
||||||
|
"typical",
|
||||||
|
"top_a",
|
||||||
|
"rep_pen",
|
||||||
|
"rep_pen_slope",
|
||||||
|
"rep_pen_range",
|
||||||
|
"sampler_order",
|
||||||
|
]:
|
||||||
|
setattr(
|
||||||
|
self,
|
||||||
|
setting,
|
||||||
|
overrides.get(setting, getattr(koboldai_vars, setting))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def raw_generate(
|
def raw_generate(
|
||||||
# prompt is either a string (text) or a list (token ids)
|
# prompt is either a string (text) or a list (token ids)
|
||||||
prompt: Union[str, list, np.ndarray],
|
prompt: Union[str, list, np.ndarray],
|
||||||
max_length: int,
|
max_new: int,
|
||||||
|
|
||||||
do_streaming: bool = False,
|
do_streaming: bool = False,
|
||||||
do_dynamic_wi: bool = False,
|
do_dynamic_wi: bool = False,
|
||||||
batch_count: int = 1,
|
batch_count: int = 1,
|
||||||
bypass_hf_maxlength: bool = False,
|
bypass_hf_maxlength: bool = False,
|
||||||
|
generation_settings: Optional[dict] = None
|
||||||
) -> GenerationResult:
|
) -> GenerationResult:
|
||||||
|
|
||||||
|
gen_settings = GenerationSettings(*(generation_settings or {}))
|
||||||
|
|
||||||
|
model_functions = {
|
||||||
|
"GooseAI": oai_raw_generate,
|
||||||
|
"OAI": oai_raw_generate,
|
||||||
|
"CLUSTER": cluster_raw_generate,
|
||||||
|
"Colab": colab_raw_generate,
|
||||||
|
"API": api_raw_generate,
|
||||||
|
}
|
||||||
|
|
||||||
if isinstance(prompt, torch.Tensor):
|
if isinstance(prompt, torch.Tensor):
|
||||||
prompt_tokens = prompt.cpu().numpy()
|
prompt_tokens = prompt.cpu().numpy()
|
||||||
elif isinstance(prompt, list):
|
elif isinstance(prompt, list):
|
||||||
@@ -4998,31 +5030,25 @@ def raw_generate(
|
|||||||
assert isinstance(prompt_tokens, np.ndarray)
|
assert isinstance(prompt_tokens, np.ndarray)
|
||||||
assert len(prompt_tokens.shape) == 1
|
assert len(prompt_tokens.shape) == 1
|
||||||
|
|
||||||
|
if koboldai_vars.model == "ReadOnly":
|
||||||
if koboldai_vars.model == "Colab":
|
|
||||||
raise NotImplementedError("Colab API raw_generate unsupported")
|
|
||||||
elif koboldai_vars.model == "API":
|
|
||||||
raise NotImplementedError("API raw_generate unsupported")
|
|
||||||
elif koboldai_vars.model == "CLUSTER":
|
|
||||||
raise NotImplementedError("Cluster raw_generate unsupported")
|
|
||||||
elif koboldai_vars.model == "ReadOnly":
|
|
||||||
raise NotImplementedError("No loaded model")
|
raise NotImplementedError("No loaded model")
|
||||||
|
|
||||||
if koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
|
if koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
|
||||||
batch_encoded = tpu_raw_generate(
|
batch_encoded = tpu_raw_generate(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
max_length=max_length,
|
max_new=max_new,
|
||||||
batch_count=batch_count
|
batch_count=batch_count,
|
||||||
|
gen_settings=gen_settings
|
||||||
)
|
)
|
||||||
return GenerationResult(
|
return GenerationResult(
|
||||||
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
||||||
)
|
)
|
||||||
elif koboldai_vars.model in ["GooseAI", "OAI"]:
|
elif koboldai_vars.model in model_functions:
|
||||||
print("kiss")
|
model_functions[koboldai_vars.model](
|
||||||
batch_encoded = oai_raw_generate(
|
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
max_length=max_length,
|
max_new=max_new,
|
||||||
batch_count=batch_count
|
batch_count=batch_count,
|
||||||
|
gen_settings=gen_settings
|
||||||
)
|
)
|
||||||
return GenerationResult(
|
return GenerationResult(
|
||||||
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
||||||
@@ -5031,10 +5057,11 @@ def raw_generate(
|
|||||||
# Torch HF
|
# Torch HF
|
||||||
batch_encoded = torch_raw_generate(
|
batch_encoded = torch_raw_generate(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
max_new=max_length if not bypass_hf_maxlength else int(2e9),
|
max_new=max_new if not bypass_hf_maxlength else int(2e9),
|
||||||
do_streaming=do_streaming,
|
do_streaming=do_streaming,
|
||||||
do_dynamic_wi=do_dynamic_wi,
|
do_dynamic_wi=do_dynamic_wi,
|
||||||
batch_count=batch_count,
|
batch_count=batch_count,
|
||||||
|
gen_settings=gen_settings
|
||||||
)
|
)
|
||||||
return GenerationResult(
|
return GenerationResult(
|
||||||
out_batches=batch_encoded,
|
out_batches=batch_encoded,
|
||||||
@@ -5045,8 +5072,9 @@ def raw_generate(
|
|||||||
|
|
||||||
def tpu_raw_generate(
|
def tpu_raw_generate(
|
||||||
prompt_tokens: List[int],
|
prompt_tokens: List[int],
|
||||||
max_length: int,
|
max_new: int,
|
||||||
batch_count: int,
|
batch_count: int,
|
||||||
|
gen_settings: GenerationSettings
|
||||||
):
|
):
|
||||||
|
|
||||||
# Mostly lifted from apiactionsubmit_tpumtjgenerate
|
# Mostly lifted from apiactionsubmit_tpumtjgenerate
|
||||||
@@ -5055,20 +5083,20 @@ def tpu_raw_generate(
|
|||||||
genout = tpool.execute(
|
genout = tpool.execute(
|
||||||
tpu_mtj_backend.infer_static,
|
tpu_mtj_backend.infer_static,
|
||||||
np.uint32(prompt_tokens),
|
np.uint32(prompt_tokens),
|
||||||
gen_len = max_length,
|
gen_len = max_new,
|
||||||
temp=koboldai_vars.temp,
|
temp=gen_settings.temp,
|
||||||
top_p=koboldai_vars.top_p,
|
top_p=gen_settings.top_p,
|
||||||
top_k=koboldai_vars.top_k,
|
top_k=gen_settings.top_k,
|
||||||
tfs=koboldai_vars.tfs,
|
tfs=gen_settings.tfs,
|
||||||
typical=koboldai_vars.typical,
|
typical=gen_settings.typical,
|
||||||
top_a=koboldai_vars.top_a,
|
top_a=gen_settings.top_a,
|
||||||
numseqs=batch_count,
|
numseqs=batch_count,
|
||||||
repetition_penalty=koboldai_vars.rep_pen,
|
repetition_penalty=gen_settings.rep_pen,
|
||||||
rpslope=koboldai_vars.rep_pen_slope,
|
rpslope=gen_settings.rep_pen_slope,
|
||||||
rprange=koboldai_vars.rep_pen_range,
|
rprange=gen_settings.rep_pen_range,
|
||||||
soft_embeddings=koboldai_vars.sp,
|
soft_embeddings=koboldai_vars.sp,
|
||||||
soft_tokens=soft_tokens,
|
soft_tokens=soft_tokens,
|
||||||
sampler_order=koboldai_vars.sampler_order,
|
sampler_order=gen_settings.sampler_order,
|
||||||
)
|
)
|
||||||
|
|
||||||
genout = np.array(genout)
|
genout = np.array(genout)
|
||||||
@@ -5078,10 +5106,11 @@ def tpu_raw_generate(
|
|||||||
def torch_raw_generate(
|
def torch_raw_generate(
|
||||||
prompt_tokens: Union[List[int], torch.Tensor],
|
prompt_tokens: Union[List[int], torch.Tensor],
|
||||||
max_new: int,
|
max_new: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
|
|
||||||
do_streaming: bool = False,
|
do_streaming: bool = False,
|
||||||
do_dynamic_wi: bool = False,
|
do_dynamic_wi: bool = False,
|
||||||
batch_count: int = 1
|
batch_count: int = 1,
|
||||||
):
|
):
|
||||||
|
|
||||||
koboldai_vars.inference_config.do_streaming = do_streaming
|
koboldai_vars.inference_config.do_streaming = do_streaming
|
||||||
@@ -5120,8 +5149,9 @@ def torch_raw_generate(
|
|||||||
|
|
||||||
def oai_raw_generate(
|
def oai_raw_generate(
|
||||||
prompt_tokens: List[int],
|
prompt_tokens: List[int],
|
||||||
max_length: int,
|
max_new: int,
|
||||||
batch_count: int,
|
batch_count: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
):
|
):
|
||||||
# Taken mainly from oairequest()
|
# Taken mainly from oairequest()
|
||||||
|
|
||||||
@@ -5140,27 +5170,27 @@ def oai_raw_generate(
|
|||||||
if 'GooseAI' in koboldai_vars.configname:
|
if 'GooseAI' in koboldai_vars.configname:
|
||||||
reqdata = {
|
reqdata = {
|
||||||
'prompt': decoded_prompt,
|
'prompt': decoded_prompt,
|
||||||
'max_tokens': koboldai_vars.genamt,
|
'max_tokens': max_new,
|
||||||
'temperature': koboldai_vars.temp,
|
'temperature': gen_settings.temp,
|
||||||
'top_a': koboldai_vars.top_a,
|
'top_a': gen_settings.top_a,
|
||||||
'top_p': koboldai_vars.top_p,
|
'top_p': gen_settings.top_p,
|
||||||
'top_k': koboldai_vars.top_k,
|
'top_k': gen_settings.top_k,
|
||||||
'tfs': koboldai_vars.tfs,
|
'tfs': gen_settings.tfs,
|
||||||
'typical_p': koboldai_vars.typical,
|
'typical_p': gen_settings.typical,
|
||||||
'repetition_penalty': koboldai_vars.rep_pen,
|
'repetition_penalty': gen_settings.rep_pen,
|
||||||
'repetition_penalty_slope': koboldai_vars.rep_pen_slope,
|
'repetition_penalty_slope': gen_settings.rep_pen_slope,
|
||||||
'repetition_penalty_range': koboldai_vars.rep_pen_range,
|
'repetition_penalty_range': gen_settings.rep_pen_range,
|
||||||
'n': koboldai_vars.numseqs,
|
'n': batch_count,
|
||||||
# TODO: Implement streaming
|
# TODO: Implement streaming
|
||||||
'stream': False
|
'stream': False
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
reqdata = {
|
reqdata = {
|
||||||
'prompt': decoded_prompt,
|
'prompt': decoded_prompt,
|
||||||
'max_tokens': koboldai_vars.genamt,
|
'max_tokens': max_new,
|
||||||
'temperature': koboldai_vars.temp,
|
'temperature': gen_settings.temp,
|
||||||
'top_p': koboldai_vars.top_p,
|
'top_p': gen_settings.top_p,
|
||||||
'n': koboldai_vars.numseqs,
|
'n': batch_count,
|
||||||
'stream': False
|
'stream': False
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5198,6 +5228,173 @@ def oai_raw_generate(
|
|||||||
set_aibusy(0)
|
set_aibusy(0)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
class HordeException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cluster_raw_generate(
|
||||||
|
prompt_tokens: List[int],
|
||||||
|
max_new: int,
|
||||||
|
batch_count: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
|
):
|
||||||
|
decoded_prompt = utils.decodenewlines(tokenizer.decode(prompt_tokens))
|
||||||
|
|
||||||
|
# Store context in memory to use it for comparison with generated content
|
||||||
|
koboldai_vars.lastctx = decoded_prompt
|
||||||
|
|
||||||
|
# Build request JSON data
|
||||||
|
reqdata = {
|
||||||
|
'max_length': max_new,
|
||||||
|
'max_context_length': koboldai_vars.max_length,
|
||||||
|
'rep_pen': gen_settings.rep_pen,
|
||||||
|
'rep_pen_slope': gen_settings.rep_pen_slope,
|
||||||
|
'rep_pen_range': gen_settings.rep_pen_range,
|
||||||
|
'temperature': gen_settings.temp,
|
||||||
|
'top_p': gen_settings.top_p,
|
||||||
|
'top_k': gen_settings.top_k,
|
||||||
|
'top_a': gen_settings.top_a,
|
||||||
|
'tfs': gen_settings.tfs,
|
||||||
|
'typical': gen_settings.typical,
|
||||||
|
'n': batch_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster_metadata = {
|
||||||
|
'prompt': decoded_prompt,
|
||||||
|
'params': reqdata,
|
||||||
|
'api_key': koboldai_vars.apikey,
|
||||||
|
'models': koboldai_vars.cluster_requested_models,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create request
|
||||||
|
req = requests.post(
|
||||||
|
koboldai_vars.colaburl[:-8] + "/api/v1/generate/sync",
|
||||||
|
json=cluster_metadata,
|
||||||
|
)
|
||||||
|
js = req.json()
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
errmsg = f"Horde unavailable. Please try again later"
|
||||||
|
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||||
|
raise HordeException(errmsg)
|
||||||
|
except requests.exceptions.JSONDecodeError:
|
||||||
|
errmsg = f"Unexpected message received from the Horde: '{req.text}'"
|
||||||
|
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||||
|
raise HordeException(errmsg)
|
||||||
|
if(req.status_code == 503):
|
||||||
|
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||||
|
raise HordeException(errmsg)
|
||||||
|
if(req.status_code != 200):
|
||||||
|
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||||
|
raise HordeException(errmsg)
|
||||||
|
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
||||||
|
print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}")
|
||||||
|
|
||||||
|
# TODO: Fix this. Request context issues!!
|
||||||
|
# Just in case we want to announce it to the user
|
||||||
|
# if len(js) == 1:
|
||||||
|
# warnmsg = f"Text generated by {js[0]['server_name']}"
|
||||||
|
# emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
|
||||||
|
return np.array([tokenizer.encode(cgen["text"]) for cgen in js])
|
||||||
|
|
||||||
|
def colab_raw_generate(
|
||||||
|
prompt_tokens: List[int],
|
||||||
|
max_new: int,
|
||||||
|
batch_count: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
|
):
|
||||||
|
decoded_prompt = utils.decodenewlines(tokenizer.decode(prompt_tokens))
|
||||||
|
|
||||||
|
# Store context in memory to use it for comparison with generated content
|
||||||
|
koboldai_vars.lastctx = decoded_prompt
|
||||||
|
|
||||||
|
# Build request JSON data
|
||||||
|
reqdata = {
|
||||||
|
'text': decoded_prompt,
|
||||||
|
'min': 0,
|
||||||
|
'max': max_new,
|
||||||
|
'rep_pen': gen_settings.rep_pen,
|
||||||
|
'rep_pen_slope': gen_settings.rep_pen_slope,
|
||||||
|
'rep_pen_range': gen_settings.rep_pen_range,
|
||||||
|
'temperature': gen_settings.temp,
|
||||||
|
'top_p': gen_settings.top_p,
|
||||||
|
'top_k': gen_settings.top_k,
|
||||||
|
'tfs': gen_settings.tfs,
|
||||||
|
'typical': gen_settings.typical,
|
||||||
|
'topa': gen_settings.top_a,
|
||||||
|
'numseqs': batch_count,
|
||||||
|
'retfultxt': False
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create request
|
||||||
|
req = requests.post(
|
||||||
|
koboldai_vars.colaburl,
|
||||||
|
json = reqdata
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deal with the response
|
||||||
|
if(req.status_code == 200):
|
||||||
|
js = req.json()["data"]
|
||||||
|
|
||||||
|
# Try to be backwards compatible with outdated colab
|
||||||
|
if("text" in js):
|
||||||
|
genout = [getnewcontent(js["text"])]
|
||||||
|
else:
|
||||||
|
genout = js["seqs"]
|
||||||
|
|
||||||
|
return np.array([tokenizer.encode(x) for x in genout])
|
||||||
|
|
||||||
|
def api_raw_generate(
|
||||||
|
prompt_tokens: List[int],
|
||||||
|
max_new: int,
|
||||||
|
batch_count: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
|
):
|
||||||
|
decoded_prompt = utils.decodenewlines(tokenizer.decode(prompt_tokens))
|
||||||
|
|
||||||
|
# Store context in memory to use it for comparison with generated content
|
||||||
|
koboldai_vars.lastctx = decoded_prompt
|
||||||
|
|
||||||
|
# Build request JSON data
|
||||||
|
reqdata = {
|
||||||
|
'prompt': decoded_prompt,
|
||||||
|
'max_length': max_new,
|
||||||
|
'max_context_length': gen_settings.max_length,
|
||||||
|
'rep_pen': gen_settings.rep_pen,
|
||||||
|
'rep_pen_slope': gen_settings.rep_pen_slope,
|
||||||
|
'rep_pen_range': gen_settings.rep_pen_range,
|
||||||
|
'temperature': gen_settings.temp,
|
||||||
|
'top_p': gen_settings.top_p,
|
||||||
|
'top_k': gen_settings.top_k,
|
||||||
|
'top_a': gen_settings.top_a,
|
||||||
|
'tfs': gen_settings.tfs,
|
||||||
|
'typical': gen_settings.typical,
|
||||||
|
'n': batch_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create request
|
||||||
|
while True:
|
||||||
|
req = requests.post(
|
||||||
|
koboldai_vars.colaburl[:-8] + "/api/v1/generate",
|
||||||
|
json=reqdata,
|
||||||
|
)
|
||||||
|
if(req.status_code == 503): # Server is currently generating something else so poll until it's our turn
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
js = req.json()
|
||||||
|
if(req.status_code != 200):
|
||||||
|
errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
||||||
|
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
||||||
|
emit("error", errmsg, broadcast=True, room="UI_2")
|
||||||
|
set_aibusy(0)
|
||||||
|
return
|
||||||
|
|
||||||
|
genout = [obj["text"] for obj in js["results"]]
|
||||||
|
return np.array([tokenizer.encode(x) for x in genout])
|
||||||
|
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Send text to generator and deal with output
|
# Send text to generator and deal with output
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
@@ -5351,271 +5548,6 @@ def pinsequence(n):
|
|||||||
send_debug()
|
send_debug()
|
||||||
|
|
||||||
|
|
||||||
#==================================================================#
|
|
||||||
# Send transformers-style request to ngrok/colab host
|
|
||||||
#==================================================================#
|
|
||||||
def sendtocolab(txt, min, max):
|
|
||||||
# Log request to console
|
|
||||||
if not koboldai_vars.quiet:
|
|
||||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
|
||||||
|
|
||||||
# Store context in memory to use it for comparison with generated content
|
|
||||||
koboldai_vars.lastctx = txt
|
|
||||||
|
|
||||||
# Build request JSON data
|
|
||||||
reqdata = {
|
|
||||||
'text': txt,
|
|
||||||
'min': min,
|
|
||||||
'max': max,
|
|
||||||
'rep_pen': koboldai_vars.rep_pen,
|
|
||||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
|
||||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
|
||||||
'temperature': koboldai_vars.temp,
|
|
||||||
'top_p': koboldai_vars.top_p,
|
|
||||||
'top_k': koboldai_vars.top_k,
|
|
||||||
'tfs': koboldai_vars.tfs,
|
|
||||||
'typical': koboldai_vars.typical,
|
|
||||||
'topa': koboldai_vars.top_a,
|
|
||||||
'numseqs': koboldai_vars.numseqs,
|
|
||||||
'retfultxt': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create request
|
|
||||||
req = requests.post(
|
|
||||||
koboldai_vars.colaburl,
|
|
||||||
json = reqdata
|
|
||||||
)
|
|
||||||
|
|
||||||
# Deal with the response
|
|
||||||
if(req.status_code == 200):
|
|
||||||
js = req.json()["data"]
|
|
||||||
|
|
||||||
# Try to be backwards compatible with outdated colab
|
|
||||||
if("text" in js):
|
|
||||||
genout = [getnewcontent(js["text"])]
|
|
||||||
else:
|
|
||||||
genout = js["seqs"]
|
|
||||||
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
|
||||||
|
|
||||||
execute_outmod()
|
|
||||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
|
||||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
|
||||||
genout = []
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
|
||||||
assert type(genout[-1]) is str
|
|
||||||
|
|
||||||
koboldai_vars.actions.clear_unused_options()
|
|
||||||
koboldai_vars.actions.append_options([applyoutputformatting(x["generated_text"]) for x in genout])
|
|
||||||
genout = [{"generated_text": x['text']} for x in koboldai_vars.actions.get_current_options()]
|
|
||||||
if(len(genout) == 1):
|
|
||||||
|
|
||||||
genresult(genout[0])
|
|
||||||
else:
|
|
||||||
# Convert torch output format to transformers
|
|
||||||
seqs = []
|
|
||||||
for seq in genout:
|
|
||||||
seqs.append({"generated_text": seq})
|
|
||||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
|
||||||
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
|
||||||
else:
|
|
||||||
genselect(genout)
|
|
||||||
|
|
||||||
# Format output before continuing
|
|
||||||
#genout = applyoutputformatting(getnewcontent(genout))
|
|
||||||
|
|
||||||
# Add formatted text to Actions array and refresh the game screen
|
|
||||||
#koboldai_vars.actions.append(genout)
|
|
||||||
#refresh_story()
|
|
||||||
#emit('from_server', {'cmd': 'texteffect', 'data': koboldai_vars.actions.get_last_key() + 1 if len(koboldai_vars.actions) else 0})
|
|
||||||
|
|
||||||
set_aibusy(0)
|
|
||||||
else:
|
|
||||||
errmsg = "Colab API Error: Failed to get a reply from the server. Please check the colab console."
|
|
||||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True, room="UI_1")
|
|
||||||
set_aibusy(0)
|
|
||||||
|
|
||||||
|
|
||||||
#==================================================================#
|
|
||||||
# Send transformers-style request to KoboldAI API
|
|
||||||
#==================================================================#
|
|
||||||
def sendtoapi(txt, min, max):
|
|
||||||
# Log request to console
|
|
||||||
if not koboldai_vars.quiet:
|
|
||||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
|
||||||
|
|
||||||
# Store context in memory to use it for comparison with generated content
|
|
||||||
koboldai_vars.lastctx = txt
|
|
||||||
|
|
||||||
# Build request JSON data
|
|
||||||
reqdata = {
|
|
||||||
'prompt': txt,
|
|
||||||
'max_length': max - min + 1,
|
|
||||||
'max_context_length': koboldai_vars.max_length,
|
|
||||||
'rep_pen': koboldai_vars.rep_pen,
|
|
||||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
|
||||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
|
||||||
'temperature': koboldai_vars.temp,
|
|
||||||
'top_p': koboldai_vars.top_p,
|
|
||||||
'top_k': koboldai_vars.top_k,
|
|
||||||
'top_a': koboldai_vars.top_a,
|
|
||||||
'tfs': koboldai_vars.tfs,
|
|
||||||
'typical': koboldai_vars.typical,
|
|
||||||
'n': koboldai_vars.numseqs,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create request
|
|
||||||
while True:
|
|
||||||
req = requests.post(
|
|
||||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate",
|
|
||||||
json=reqdata,
|
|
||||||
)
|
|
||||||
if(req.status_code == 503): # Server is currently generating something else so poll until it's our turn
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
js = req.json()
|
|
||||||
if(req.status_code != 200):
|
|
||||||
errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
|
|
||||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
|
||||||
emit("error", errmsg, broadcast=True, room="UI_2")
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
|
|
||||||
genout = [obj["text"] for obj in js["results"]]
|
|
||||||
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
|
||||||
|
|
||||||
execute_outmod()
|
|
||||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
|
||||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
|
||||||
genout = []
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
|
||||||
assert type(genout[-1]) is str
|
|
||||||
|
|
||||||
if(len(genout) == 1):
|
|
||||||
genresult(genout[0])
|
|
||||||
else:
|
|
||||||
adjusted_genout = []
|
|
||||||
for item in genout:
|
|
||||||
adjusted_genout.append({"generated_text": item})
|
|
||||||
# Convert torch output format to transformers
|
|
||||||
seqs = []
|
|
||||||
for seq in adjusted_genout:
|
|
||||||
seqs.append({"generated_text": seq})
|
|
||||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
|
||||||
genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
|
||||||
else:
|
|
||||||
genselect(adjusted_genout)
|
|
||||||
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
|
|
||||||
#==================================================================#
|
|
||||||
# Send transformers-style request to KoboldAI Cluster
|
|
||||||
#==================================================================#
|
|
||||||
def sendtocluster(txt, min, max):
|
|
||||||
# Log request to console
|
|
||||||
if not koboldai_vars.quiet:
|
|
||||||
print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
|
|
||||||
|
|
||||||
# Store context in memory to use it for comparison with generated content
|
|
||||||
koboldai_vars.lastctx = txt
|
|
||||||
|
|
||||||
# Build request JSON data
|
|
||||||
reqdata = {
|
|
||||||
'max_length': max - min + 1,
|
|
||||||
'max_context_length': koboldai_vars.max_length,
|
|
||||||
'rep_pen': koboldai_vars.rep_pen,
|
|
||||||
'rep_pen_slope': koboldai_vars.rep_pen_slope,
|
|
||||||
'rep_pen_range': koboldai_vars.rep_pen_range,
|
|
||||||
'temperature': koboldai_vars.temp,
|
|
||||||
'top_p': koboldai_vars.top_p,
|
|
||||||
'top_k': koboldai_vars.top_k,
|
|
||||||
'top_a': koboldai_vars.top_a,
|
|
||||||
'tfs': koboldai_vars.tfs,
|
|
||||||
'typical': koboldai_vars.typical,
|
|
||||||
'n': koboldai_vars.numseqs,
|
|
||||||
}
|
|
||||||
cluster_metadata = {
|
|
||||||
'prompt': txt,
|
|
||||||
'params': reqdata,
|
|
||||||
'api_key': koboldai_vars.apikey,
|
|
||||||
'models': koboldai_vars.cluster_requested_models,
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
# Create request
|
|
||||||
req = requests.post(
|
|
||||||
koboldai_vars.colaburl[:-8] + "/api/v1/generate/sync",
|
|
||||||
json=cluster_metadata,
|
|
||||||
)
|
|
||||||
js = req.json()
|
|
||||||
except requests.exceptions.ConnectionError:
|
|
||||||
errmsg = f"Horde unavailable. Please try again later"
|
|
||||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
except requests.exceptions.JSONDecodeError:
|
|
||||||
errmsg = f"Unexpected message received from the Horde: '{req.text}'"
|
|
||||||
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
if(req.status_code == 503):
|
|
||||||
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
|
||||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
if(req.status_code != 200):
|
|
||||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
|
||||||
print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
|
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
|
|
||||||
print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}")
|
|
||||||
# Just in case we want to announce it to the user
|
|
||||||
if len(js) == 1:
|
|
||||||
warnmsg = f"Text generated by {js[0]['server_name']}"
|
|
||||||
emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
|
|
||||||
genout = [cgen['text'] for cgen in js]
|
|
||||||
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
|
|
||||||
|
|
||||||
execute_outmod()
|
|
||||||
if(koboldai_vars.lua_koboldbridge.regeneration_required):
|
|
||||||
koboldai_vars.lua_koboldbridge.regeneration_required = False
|
|
||||||
genout = []
|
|
||||||
for i in range(koboldai_vars.numseqs):
|
|
||||||
genout.append(koboldai_vars.lua_koboldbridge.outputs[i+1])
|
|
||||||
assert type(genout[-1]) is str
|
|
||||||
|
|
||||||
if(len(genout) == 1):
|
|
||||||
genresult(genout[0])
|
|
||||||
else:
|
|
||||||
adjusted_genout = []
|
|
||||||
for item in genout:
|
|
||||||
adjusted_genout.append({"generated_text": item})
|
|
||||||
# Convert torch output format to transformers
|
|
||||||
seqs = []
|
|
||||||
for seq in adjusted_genout:
|
|
||||||
seqs.append({"generated_text": seq})
|
|
||||||
if(koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
|
||||||
genresult(adjusted_genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
|
||||||
else:
|
|
||||||
genselect(adjusted_genout)
|
|
||||||
|
|
||||||
set_aibusy(0)
|
|
||||||
return
|
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Send text to TPU mesh transformer backend
|
# Send text to TPU mesh transformer backend
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
@@ -8242,7 +8174,7 @@ def UI_2_generate_raw():
|
|||||||
return Response(json.dumps({"error": "No model"}), status=500)
|
return Response(json.dumps({"error": "No model"}), status=500)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
out = raw_generate(prompt, max_length=80)
|
out = raw_generate(prompt, max_new=80)
|
||||||
except NotImplementedError as e:
|
except NotImplementedError as e:
|
||||||
return Response(json.dumps({"error": str(e)}), status=500)
|
return Response(json.dumps({"error": str(e)}), status=500)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user