Merge branch 'henk717:united' into Image_Gen

This commit is contained in:
ebolam 2023-11-03 09:13:27 -04:00 committed by GitHub
commit 5d20e33b68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 2037 additions and 864 deletions

View File

@ -2654,9 +2654,8 @@ def get_message(msg):
if(koboldai_vars.mode == "play"):
if(koboldai_vars.aibusy):
if(msg.get('allowabort', False)):
koboldai_vars.abort = True
model.abort_generation()
return
koboldai_vars.abort = False
koboldai_vars.lua_koboldbridge.feedback = None
if(koboldai_vars.chatmode):
if(type(msg['chatname']) is not str):
@ -2676,9 +2675,8 @@ def get_message(msg):
elif(msg['cmd'] == 'retry'):
if(koboldai_vars.aibusy):
if(msg.get('allowabort', False)):
koboldai_vars.abort = True
model.abort_generation()
return
koboldai_vars.abort = False
if(koboldai_vars.chatmode):
if(type(msg['chatname']) is not str):
raise ValueError("Chatname must be a string")
@ -3344,7 +3342,7 @@ def actionsubmit(
# Clear the startup text from game screen
emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1")
calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
data = ""
force_submit = True
disable_recentrng = True
@ -3370,13 +3368,13 @@ def actionsubmit(
refresh_story()
if(len(koboldai_vars.actions) > 0):
emit('from_server', {'cmd': 'texteffect', 'data': koboldai_vars.actions.get_last_key() + 1}, broadcast=True, room="UI_1")
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
data = ""
force_submit = True
disable_recentrng = True
continue
else:
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"], flash=False)
refresh_story()
data = ""
@ -3410,7 +3408,7 @@ def actionsubmit(
if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating):
# Off to the tokenizer!
calcsubmit("", gen_mode=gen_mode)
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
data = ""
force_submit = True
disable_recentrng = True
@ -3431,13 +3429,13 @@ def actionsubmit(
genout = [{"generated_text": x['text']} for x in koboldai_vars.actions.get_current_options()]
if(len(genout) == 1):
genresult(genout[0]["generated_text"])
if(not no_generate and not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
if(not no_generate and not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
data = ""
force_submit = True
disable_recentrng = True
continue
else:
if(not no_generate and not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
if(not no_generate and not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
data = ""
force_submit = True
@ -6204,7 +6202,7 @@ def UI_2_submit(data):
def UI_2_abort(data):
if koboldai_vars.debug:
print("got abort")
koboldai_vars.abort = True
model.abort_generation()
#==================================================================#
@ -8267,7 +8265,7 @@ class GenerationOutputSchema(KoboldSchema):
results: List[GenerationResultSchema] = fields.List(fields.Nested(GenerationResultSchema), required=True, metadata={"description": "Array of generated outputs."})
class StoryNumsChunkSchema(KoboldSchema):
num: int = fields.Integer(required=True, metadata={"description": "Guaranteed to not equal the `num` of any other active story chunk. Equals 0 iff this is the first action of the story (the prompt)."})
num: int = fields.Integer(required=True, metadata={"description": "Guaranteed to not equal the `num` of any other active story chunk. Equals 0 if this is the first action of the story (the prompt)."})
class StoryChunkSchema(StoryNumsChunkSchema, KoboldSchema):
text: str = fields.String(required=True, metadata={"description": "The text inside this story chunk."})

View File

@ -80,7 +80,7 @@
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
"\n",
"Model = \"Nerys V2 6B\" #@param [\"HoloMax 13B (United)\", \"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
"Model = \"Nerys V2 6B\" #@param [\"HoloMax 13B (United)\", \"Emerhyst 13B (United)\", \"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
"Revision = \"\" #@param [\"\"]{allow-input: true}\n",
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
@ -160,9 +160,14 @@
" path = \"\"\n",
" download = \"\"\n",
" Version = \"United\"\n",
"elif Model == \"Emerhyst 13B (United)\":\n",
" Model = \"Undi95/Emerhyst-13B\"\n",
" path = \"\"\n",
" download = \"\"\n",
" Version = \"United\"\n",
"elif Model == \"MythoMax 13B (United)\":\n",
" Model = \"TheBloke/MythoMax-L2-13B-GPTQ\"\n",
" Revision = \"gptq-4bit-128g-actorder_True\"\n",
" Model = \"Gryphe/MythoMax-L2-13b\"\n",
" Revision = \"\"\n",
" path = \"\"\n",
" download = \"\"\n",
" Version = \"United\"\n",
@ -177,7 +182,7 @@
" download = \"\"\n",
" Version = \"United\"\n",
"elif Model == \"HoloMax 13B (United)\":\n",
" Model = \"KoboldAI/LLaMA2-13B-Holomax-GPTQ\"\n",
" Model = \"KoboldAI/LLaMA2-13B-Holomax\"\n",
" path = \"\"\n",
" download = \"\"\n",
" Version = \"United\"\n",

View File

@ -171,6 +171,7 @@ class InferenceModel:
"""Root class for all models."""
def __init__(self) -> None:
self.abort = False
self.gen_state = {}
self.post_token_hooks = []
self.stopper_hooks = []
@ -669,6 +670,9 @@ class InferenceModel:
for hook in self.post_token_hooks:
hook(self, input_ids)
def abort_generation(self, abort=True):
self.abort=abort
def get_supported_gen_modes(self) -> List[GenerationMode]:
"""Returns a list of compatible `GenerationMode`s for the current model.

View File

@ -29,19 +29,20 @@ class model_backend(InferenceModel):
super().__init__()
self.url = "https://horde.koboldai.net"
self.key = "0000000000"
self.models = self.get_cluster_models()
self.models = []
self.model_name = "Horde"
self.model = []
self.request_id = None
# Do not allow API to be served over the API
self.capabilties = ModelCapabilities(api_host=False)
def is_valid(self, model_name, model_path, menu_path):
logger.debug("Horde Models: {}".format(self.models))
return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
return model_name == "CLUSTER"
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
self.models = self.get_cluster_models()
if os.path.exists("settings/horde.model_backend.settings") and 'base_url' not in vars(self):
with open("settings/horde.model_backend.settings", "r") as f:
temp = json.load(f)
@ -105,13 +106,11 @@ class model_backend(InferenceModel):
except:
logger.init_err("KAI Horde Models", status="Failed")
logger.error("Provided KoboldAI Horde URL unreachable")
emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
return
if not req.ok:
# Something went wrong, print the message and quit since we can't initialize an engine
logger.init_err("KAI Horde Models", status="Failed")
logger.error(req.json())
emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
return
engines = req.json()
@ -222,18 +221,18 @@ class model_backend(InferenceModel):
logger.error(errmsg)
raise HordeException(errmsg)
request_id = req_status["id"]
logger.debug("Horde Request ID: {}".format(request_id))
self.request_id = req_status["id"]
logger.debug("Horde Request ID: {}".format(self.request_id))
# We've sent the request and got the ID back, now we need to watch it to see when it finishes
finished = False
self.finished = False
cluster_agent_headers = {"Client-Agent": client_agent}
while not finished:
while not self.finished:
try:
req = requests.get(
f"{self.url}/api/v2/generate/text/status/{request_id}",
f"{self.url}/api/v2/generate/text/status/{self.request_id}",
headers=cluster_agent_headers,
)
except requests.exceptions.ConnectionError:
@ -260,15 +259,16 @@ class model_backend(InferenceModel):
logger.error(errmsg)
raise HordeException(errmsg)
finished = req_status["done"]
self.finished = req_status["done"]
utils.koboldai_vars.horde_wait_time = req_status["wait_time"]
utils.koboldai_vars.horde_queue_position = req_status["queue_position"]
utils.koboldai_vars.horde_queue_size = req_status["waiting"]
if not finished:
if not self.finished:
logger.debug(req_status)
time.sleep(1)
self.request_id = None
logger.debug("Last Horde Status Message: {}".format(req_status))
if req_status["faulted"]:
@ -287,3 +287,33 @@ class model_backend(InferenceModel):
is_whole_generation=True,
single_line=single_line,
)
def abort_generation(self, abort=True):
logger.info("Attempting to stop horde gen")
self.finished = True
try:
# Create request
client_agent = "KoboldAI:2.0.0:koboldai.org"
cluster_headers = {
"apikey": self.key,
"Client-Agent": client_agent,
}
req = requests.delete(
f"{self.url}/v2/generate/text/status/{self.request_id}",
headers=cluster_headers,
)
except requests.exceptions.ConnectionError:
errmsg = f"Horde unavailable. Please try again later"
logger.error(errmsg)
raise HordeException(errmsg)
if req.status_code == 503:
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
logger.error(errmsg)
raise HordeException(errmsg)
elif not req.ok:
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
logger.error(req.url)
logger.error(errmsg)
logger.error(req.text)
raise HordeException(errmsg)

View File

@ -26,11 +26,11 @@ class Stoppers:
f"Inconsistency detected between KoboldAI Python and Lua backends ({utils.koboldai_vars.generated_tkns} != {utils.koboldai_vars.lua_koboldbridge.generated_cols})"
)
if utils.koboldai_vars.abort or (
if model.abort or (
utils.koboldai_vars.inference_config.stop_at_genamt
and utils.koboldai_vars.generated_tkns >= utils.koboldai_vars.genamt
):
utils.koboldai_vars.abort = False
model.abort = False
model.gen_state["regeneration_required"] = False
model.gen_state["halt"] = False
return True

View File

@ -48,6 +48,7 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
windows-curses; sys_platform == 'win32'
pynvml
flash_attn==2.3.0
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
xformers==0.0.21
omegaconf

File diff suppressed because one or more lines are too long

View File

@ -100,7 +100,7 @@
</div>
</div><br>
<div class="statusbar_outer_horde var_sync_alt_system_aibusy var_sync_alt_model_horde_wait_time" id="status_bar_horde">
<div class="statusbar_inner_horde" style="width:100%">
<div class="statusbar_inner_horde" style="width:100%" onclick="socket.emit('abort','');">
<div>&nbsp;</div>
<div>Queue <span class="var_sync_model_horde_queue_position"></span> of <span class="var_sync_model_horde_queue_size"></span></div>
<div><span class="var_sync_model_horde_wait_time"></span> sec left</div>

View File

@ -98,9 +98,9 @@ def replaceblanklines(txt):
#==================================================================#
def removespecialchars(txt, koboldai_vars=None):
if koboldai_vars is None or koboldai_vars.actionmode == 0:
txt = re.sub(r"[#/@%<>{}+=~|\^]", "", txt)
txt = re.sub(r"[/@%<>{}+=~|\^]", "", txt)
else:
txt = re.sub(r"[#/@%{}+=~|\^]", "", txt)
txt = re.sub(r"[/@%{}+=~|\^]", "", txt)
return txt
#==================================================================#