mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Finishing up OAI/Goose
This commit is contained in:
98
aiserver.py
98
aiserver.py
@@ -43,6 +43,7 @@ import inspect
|
|||||||
import warnings
|
import warnings
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import copy
|
import copy
|
||||||
|
import numpy as np
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from typing import Any, Callable, TypeVar, Tuple, Union, Dict, Set, List, Optional, Type
|
from typing import Any, Callable, TypeVar, Tuple, Union, Dict, Set, List, Optional, Type
|
||||||
@@ -1141,10 +1142,6 @@ def spRequest(filename):
|
|||||||
koboldai_vars.sp_changed = True
|
koboldai_vars.sp_changed = True
|
||||||
return
|
return
|
||||||
|
|
||||||
global np
|
|
||||||
if 'np' not in globals():
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
z, version, shape, fortran_order, dtype = fileops.checksp("./softprompts/"+filename, koboldai_vars.modeldim)
|
z, version, shape, fortran_order, dtype = fileops.checksp("./softprompts/"+filename, koboldai_vars.modeldim)
|
||||||
if not isinstance(z, zipfile.ZipFile):
|
if not isinstance(z, zipfile.ZipFile):
|
||||||
raise RuntimeError(f"{repr(filename)} is not a valid soft prompt file")
|
raise RuntimeError(f"{repr(filename)} is not a valid soft prompt file")
|
||||||
@@ -1342,9 +1339,6 @@ def general_startup(override_args=None):
|
|||||||
def tpumtjgetsofttokens():
|
def tpumtjgetsofttokens():
|
||||||
soft_tokens = None
|
soft_tokens = None
|
||||||
if(koboldai_vars.sp is None):
|
if(koboldai_vars.sp is None):
|
||||||
global np
|
|
||||||
if 'np' not in globals():
|
|
||||||
import numpy as np
|
|
||||||
tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
|
tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
|
||||||
rows = tensor.shape[0]
|
rows = tensor.shape[0]
|
||||||
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
|
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
|
||||||
@@ -1406,14 +1400,19 @@ def get_model_info(model, directory=""):
|
|||||||
if path.exists("settings/{}.v2_settings".format(model)):
|
if path.exists("settings/{}.v2_settings".format(model)):
|
||||||
with open("settings/{}.v2_settings".format(model), "r") as file:
|
with open("settings/{}.v2_settings".format(model), "r") as file:
|
||||||
# Check if API key exists
|
# Check if API key exists
|
||||||
js = json.load(file)
|
try:
|
||||||
if("apikey" in js and js["apikey"] != ""):
|
js = json.load(file)
|
||||||
# API key exists, grab it and close the file
|
|
||||||
key_value = js["apikey"]
|
if("apikey" in js and js["apikey"] != ""):
|
||||||
elif 'oaiapikey' in js and js['oaiapikey'] != "":
|
# API key exists, grab it and close the file
|
||||||
key_value = js["oaiapikey"]
|
key_value = js["apikey"]
|
||||||
if model in ('GooseAI', 'OAI'):
|
elif 'oaiapikey' in js and js['oaiapikey'] != "":
|
||||||
get_oai_models({'model': model, 'key': key_value})
|
key_value = js["oaiapikey"]
|
||||||
|
if model in ('GooseAI', 'OAI'):
|
||||||
|
get_oai_models({'model': model, 'key': key_value})
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
print(":(")
|
||||||
|
pass
|
||||||
key = True
|
key = True
|
||||||
elif model == 'ReadOnly':
|
elif model == 'ReadOnly':
|
||||||
pass
|
pass
|
||||||
@@ -1500,7 +1499,8 @@ def get_oai_models(data):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
if(req.status_code == 200):
|
if(req.status_code == 200):
|
||||||
engines = req.json()["data"]
|
r = req.json()
|
||||||
|
engines = r["data"]
|
||||||
try:
|
try:
|
||||||
engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
|
engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
|
||||||
except:
|
except:
|
||||||
@@ -1524,7 +1524,9 @@ def get_oai_models(data):
|
|||||||
if js['apikey'] != key:
|
if js['apikey'] != key:
|
||||||
changed=True
|
changed=True
|
||||||
else:
|
else:
|
||||||
|
js = {}
|
||||||
changed=True
|
changed=True
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
with open("settings/{}.v2_settings".format(model), "w") as file:
|
with open("settings/{}.v2_settings".format(model), "w") as file:
|
||||||
js["apikey"] = key
|
js["apikey"] = key
|
||||||
@@ -4877,7 +4879,7 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
|
|||||||
# stopping and continuing is this loop.
|
# stopping and continuing is this loop.
|
||||||
|
|
||||||
result = raw_generate(
|
result = raw_generate(
|
||||||
gen_in,
|
gen_in[0],
|
||||||
max_length=koboldai_vars.genamt,
|
max_length=koboldai_vars.genamt,
|
||||||
do_streaming=True,
|
do_streaming=True,
|
||||||
do_dynamic_wi=True,
|
do_dynamic_wi=True,
|
||||||
@@ -4890,7 +4892,7 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
|
|||||||
|
|
||||||
genout = result.encoded
|
genout = result.encoded
|
||||||
|
|
||||||
already_generated += len(genout[0]) - len(gen_in[0])
|
already_generated += len(genout[0]) # - len(gen_in[0])
|
||||||
assert already_generated <= koboldai_vars.genamt
|
assert already_generated <= koboldai_vars.genamt
|
||||||
|
|
||||||
if result.is_whole_generation:
|
if result.is_whole_generation:
|
||||||
@@ -4951,12 +4953,13 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
|
|||||||
)
|
)
|
||||||
genout = torch.cat((soft_tokens.tile(koboldai_vars.numseqs, 1), genout), dim=-1)
|
genout = torch.cat((soft_tokens.tile(koboldai_vars.numseqs, 1), genout), dim=-1)
|
||||||
assert genout.shape[-1] + koboldai_vars.genamt - already_generated <= koboldai_vars.max_length
|
assert genout.shape[-1] + koboldai_vars.genamt - already_generated <= koboldai_vars.max_length
|
||||||
diff = genout.shape[-1] - gen_in.shape[-1]
|
# diff = genout.shape[-1] - gen_in.shape[-1]
|
||||||
minimum += diff
|
# minimum += diff
|
||||||
maximum += diff
|
# maximum += diff
|
||||||
gen_in = genout
|
gen_in = genout
|
||||||
numseqs = 1
|
numseqs = 1
|
||||||
|
|
||||||
|
__debug("final out", genout, "already_gen", already_generated)
|
||||||
return genout, already_generated
|
return genout, already_generated
|
||||||
|
|
||||||
class GenerationResult:
|
class GenerationResult:
|
||||||
@@ -4988,7 +4991,7 @@ class GenerationResult:
|
|||||||
|
|
||||||
def raw_generate(
|
def raw_generate(
|
||||||
# prompt is either a string (text) or a list (token ids)
|
# prompt is either a string (text) or a list (token ids)
|
||||||
prompt: Union[str, list],
|
prompt: Union[str, list, np.ndarray],
|
||||||
max_length: int,
|
max_length: int,
|
||||||
|
|
||||||
do_streaming: bool = False,
|
do_streaming: bool = False,
|
||||||
@@ -4997,7 +5000,18 @@ def raw_generate(
|
|||||||
bypass_hf_maxlength: bool = False,
|
bypass_hf_maxlength: bool = False,
|
||||||
) -> GenerationResult:
|
) -> GenerationResult:
|
||||||
|
|
||||||
prompt_tokens = tokenizer.encode(prompt) if isinstance(prompt, str) else prompt
|
if isinstance(prompt, torch.Tensor):
|
||||||
|
prompt_tokens = prompt.cpu().numpy()
|
||||||
|
elif isinstance(prompt, list):
|
||||||
|
prompt_tokens = np.array(prompt)
|
||||||
|
elif isinstance(prompt, str):
|
||||||
|
prompt_tokens = tokenizer.encode(prompt)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Prompt is {type(prompt)}. Not a fan!")
|
||||||
|
|
||||||
|
assert isinstance(prompt_tokens, np.ndarray)
|
||||||
|
assert len(prompt_tokens.shape) == 1
|
||||||
|
|
||||||
|
|
||||||
if koboldai_vars.model == "Colab":
|
if koboldai_vars.model == "Colab":
|
||||||
raise NotImplementedError("Colab API raw_generate unsupported")
|
raise NotImplementedError("Colab API raw_generate unsupported")
|
||||||
@@ -5008,7 +5022,7 @@ def raw_generate(
|
|||||||
elif koboldai_vars.model == "ReadOnly":
|
elif koboldai_vars.model == "ReadOnly":
|
||||||
raise NotImplementedError("No loaded model")
|
raise NotImplementedError("No loaded model")
|
||||||
|
|
||||||
if koboldai_vars.use_colab_tpu or model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
|
if koboldai_vars.use_colab_tpu or koboldai_vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
|
||||||
batch_encoded = tpu_raw_generate(
|
batch_encoded = tpu_raw_generate(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
max_length=max_length,
|
max_length=max_length,
|
||||||
@@ -5017,7 +5031,8 @@ def raw_generate(
|
|||||||
return GenerationResult(
|
return GenerationResult(
|
||||||
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
||||||
)
|
)
|
||||||
elif model == "OAI":
|
elif koboldai_vars.model in ["GooseAI", "OAI"]:
|
||||||
|
print("kiss")
|
||||||
batch_encoded = oai_raw_generate(
|
batch_encoded = oai_raw_generate(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
max_length=max_length,
|
max_length=max_length,
|
||||||
@@ -5026,6 +5041,8 @@ def raw_generate(
|
|||||||
return GenerationResult(
|
return GenerationResult(
|
||||||
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
out_batches=batch_encoded, prompt=prompt_tokens, is_whole_generation=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print("model", model)
|
||||||
|
|
||||||
# Torch HF
|
# Torch HF
|
||||||
batch_encoded = torch_raw_generate(
|
batch_encoded = torch_raw_generate(
|
||||||
@@ -5048,7 +5065,6 @@ def tpu_raw_generate(
|
|||||||
batch_count: int,
|
batch_count: int,
|
||||||
):
|
):
|
||||||
|
|
||||||
prompt_tokens = prompt_tokens[0]
|
|
||||||
# Mostly lifted from apiactionsubmit_tpumtjgenerate
|
# Mostly lifted from apiactionsubmit_tpumtjgenerate
|
||||||
soft_tokens = tpumtjgetsofttokens()
|
soft_tokens = tpumtjgetsofttokens()
|
||||||
__debug("we are generating with", prompt_tokens, "batch", batch_count, "soft tokens", soft_tokens)
|
__debug("we are generating with", prompt_tokens, "batch", batch_count, "soft tokens", soft_tokens)
|
||||||
@@ -5174,26 +5190,28 @@ def oai_raw_generate(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
j = req.json()
|
||||||
# Deal with the response
|
# Deal with the response
|
||||||
if(req.status_code == 200):
|
if req.ok:
|
||||||
outputs = [out["text"] for out in req.json()["choices"]]
|
outputs = [out["text"] for out in j["choices"]]
|
||||||
|
|
||||||
decoded_genout = [{"generated_text": utils.decodenewlines(txt)}
|
|
||||||
for txt in outputs]
|
|
||||||
|
|
||||||
if not koboldai_vars.quiet:
|
if not koboldai_vars.quiet:
|
||||||
print("{0}{1}{2}".format(colors.CYAN, decoded_genout, colors.END))
|
print("{0}{1}{2}".format(colors.CYAN, outputs, colors.END))
|
||||||
|
|
||||||
return [tokenizer.encode(x) for x in decoded_genout]
|
return np.array([tokenizer.encode(x) for x in outputs])
|
||||||
else:
|
else:
|
||||||
# Send error message to web client
|
# Send error message to web client
|
||||||
er = req.json()
|
if "error" in j:
|
||||||
if("error" in er):
|
error_type = j["error"]["type"]
|
||||||
type = er["error"]["type"]
|
error_message = j["error"]["message"]
|
||||||
message = er["error"]["message"]
|
else:
|
||||||
|
error_type = "Unknown"
|
||||||
|
error_message = "Unknown"
|
||||||
|
|
||||||
errmsg = "OpenAI API Error: {0} - {1}".format(type, message)
|
emit('from_server', {
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True, room="UI_1")
|
'cmd': 'errmsg',
|
||||||
|
'data': f"OpenAI API Error: {error_type} - {error_message}"
|
||||||
|
}, broadcast=True, room="UI_1")
|
||||||
set_aibusy(0)
|
set_aibusy(0)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user