Merge branch 'henk717:united' into Image_Gen
This commit is contained in:
commit
5d20e33b68
22
aiserver.py
22
aiserver.py
|
@ -2654,9 +2654,8 @@ def get_message(msg):
|
|||
if(koboldai_vars.mode == "play"):
|
||||
if(koboldai_vars.aibusy):
|
||||
if(msg.get('allowabort', False)):
|
||||
koboldai_vars.abort = True
|
||||
model.abort_generation()
|
||||
return
|
||||
koboldai_vars.abort = False
|
||||
koboldai_vars.lua_koboldbridge.feedback = None
|
||||
if(koboldai_vars.chatmode):
|
||||
if(type(msg['chatname']) is not str):
|
||||
|
@ -2676,9 +2675,8 @@ def get_message(msg):
|
|||
elif(msg['cmd'] == 'retry'):
|
||||
if(koboldai_vars.aibusy):
|
||||
if(msg.get('allowabort', False)):
|
||||
koboldai_vars.abort = True
|
||||
model.abort_generation()
|
||||
return
|
||||
koboldai_vars.abort = False
|
||||
if(koboldai_vars.chatmode):
|
||||
if(type(msg['chatname']) is not str):
|
||||
raise ValueError("Chatname must be a string")
|
||||
|
@ -3344,7 +3342,7 @@ def actionsubmit(
|
|||
# Clear the startup text from game screen
|
||||
emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1")
|
||||
calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator
|
||||
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
|
||||
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
|
||||
data = ""
|
||||
force_submit = True
|
||||
disable_recentrng = True
|
||||
|
@ -3370,13 +3368,13 @@ def actionsubmit(
|
|||
refresh_story()
|
||||
if(len(koboldai_vars.actions) > 0):
|
||||
emit('from_server', {'cmd': 'texteffect', 'data': koboldai_vars.actions.get_last_key() + 1}, broadcast=True, room="UI_1")
|
||||
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
|
||||
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
|
||||
data = ""
|
||||
force_submit = True
|
||||
disable_recentrng = True
|
||||
continue
|
||||
else:
|
||||
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"], flash=False)
|
||||
refresh_story()
|
||||
data = ""
|
||||
|
@ -3410,7 +3408,7 @@ def actionsubmit(
|
|||
if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating):
|
||||
# Off to the tokenizer!
|
||||
calcsubmit("", gen_mode=gen_mode)
|
||||
if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
|
||||
if(not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
|
||||
data = ""
|
||||
force_submit = True
|
||||
disable_recentrng = True
|
||||
|
@ -3431,13 +3429,13 @@ def actionsubmit(
|
|||
genout = [{"generated_text": x['text']} for x in koboldai_vars.actions.get_current_options()]
|
||||
if(len(genout) == 1):
|
||||
genresult(genout[0]["generated_text"])
|
||||
if(not no_generate and not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
|
||||
if(not no_generate and not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None):
|
||||
data = ""
|
||||
force_submit = True
|
||||
disable_recentrng = True
|
||||
continue
|
||||
else:
|
||||
if(not no_generate and not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
if(not no_generate and not model.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and koboldai_vars.lua_koboldbridge.restart_sequence > 0):
|
||||
genresult(genout[koboldai_vars.lua_koboldbridge.restart_sequence-1]["generated_text"])
|
||||
data = ""
|
||||
force_submit = True
|
||||
|
@ -6204,7 +6202,7 @@ def UI_2_submit(data):
|
|||
def UI_2_abort(data):
|
||||
if koboldai_vars.debug:
|
||||
print("got abort")
|
||||
koboldai_vars.abort = True
|
||||
model.abort_generation()
|
||||
|
||||
|
||||
#==================================================================#
|
||||
|
@ -8267,7 +8265,7 @@ class GenerationOutputSchema(KoboldSchema):
|
|||
results: List[GenerationResultSchema] = fields.List(fields.Nested(GenerationResultSchema), required=True, metadata={"description": "Array of generated outputs."})
|
||||
|
||||
class StoryNumsChunkSchema(KoboldSchema):
|
||||
num: int = fields.Integer(required=True, metadata={"description": "Guaranteed to not equal the `num` of any other active story chunk. Equals 0 iff this is the first action of the story (the prompt)."})
|
||||
num: int = fields.Integer(required=True, metadata={"description": "Guaranteed to not equal the `num` of any other active story chunk. Equals 0 if this is the first action of the story (the prompt)."})
|
||||
|
||||
class StoryChunkSchema(StoryNumsChunkSchema, KoboldSchema):
|
||||
text: str = fields.String(required=True, metadata={"description": "The text inside this story chunk."})
|
||||
|
|
|
@ -80,7 +80,7 @@
|
|||
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||
"\n",
|
||||
"Model = \"Nerys V2 6B\" #@param [\"HoloMax 13B (United)\", \"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
|
||||
"Model = \"Nerys V2 6B\" #@param [\"HoloMax 13B (United)\", \"Emerhyst 13B (United)\", \"MythoMax 13B (United)\", \"Huginn 13B (United)\", \"Chronos 13B (United)\", \"Airoboros M2.0 13B (United)\", \"Holodeck 13B (United)\", \"Spring Dragon 13B (United)\", \"Nerys V2 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
|
||||
"Revision = \"\" #@param [\"\"]{allow-input: true}\n",
|
||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
|
@ -160,9 +160,14 @@
|
|||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"Emerhyst 13B (United)\":\n",
|
||||
" Model = \"Undi95/Emerhyst-13B\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"MythoMax 13B (United)\":\n",
|
||||
" Model = \"TheBloke/MythoMax-L2-13B-GPTQ\"\n",
|
||||
" Revision = \"gptq-4bit-128g-actorder_True\"\n",
|
||||
" Model = \"Gryphe/MythoMax-L2-13b\"\n",
|
||||
" Revision = \"\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
|
@ -177,7 +182,7 @@
|
|||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
"elif Model == \"HoloMax 13B (United)\":\n",
|
||||
" Model = \"KoboldAI/LLaMA2-13B-Holomax-GPTQ\"\n",
|
||||
" Model = \"KoboldAI/LLaMA2-13B-Holomax\"\n",
|
||||
" path = \"\"\n",
|
||||
" download = \"\"\n",
|
||||
" Version = \"United\"\n",
|
||||
|
|
|
@ -171,6 +171,7 @@ class InferenceModel:
|
|||
"""Root class for all models."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.abort = False
|
||||
self.gen_state = {}
|
||||
self.post_token_hooks = []
|
||||
self.stopper_hooks = []
|
||||
|
@ -669,6 +670,9 @@ class InferenceModel:
|
|||
for hook in self.post_token_hooks:
|
||||
hook(self, input_ids)
|
||||
|
||||
def abort_generation(self, abort=True):
|
||||
self.abort=abort
|
||||
|
||||
def get_supported_gen_modes(self) -> List[GenerationMode]:
|
||||
"""Returns a list of compatible `GenerationMode`s for the current model.
|
||||
|
||||
|
|
|
@ -29,19 +29,20 @@ class model_backend(InferenceModel):
|
|||
super().__init__()
|
||||
self.url = "https://horde.koboldai.net"
|
||||
self.key = "0000000000"
|
||||
self.models = self.get_cluster_models()
|
||||
self.models = []
|
||||
self.model_name = "Horde"
|
||||
self.model = []
|
||||
self.request_id = None
|
||||
|
||||
|
||||
# Do not allow API to be served over the API
|
||||
self.capabilties = ModelCapabilities(api_host=False)
|
||||
|
||||
def is_valid(self, model_name, model_path, menu_path):
|
||||
logger.debug("Horde Models: {}".format(self.models))
|
||||
return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
|
||||
return model_name == "CLUSTER"
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
self.models = self.get_cluster_models()
|
||||
if os.path.exists("settings/horde.model_backend.settings") and 'base_url' not in vars(self):
|
||||
with open("settings/horde.model_backend.settings", "r") as f:
|
||||
temp = json.load(f)
|
||||
|
@ -105,13 +106,11 @@ class model_backend(InferenceModel):
|
|||
except:
|
||||
logger.init_err("KAI Horde Models", status="Failed")
|
||||
logger.error("Provided KoboldAI Horde URL unreachable")
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
|
||||
return
|
||||
if not req.ok:
|
||||
# Something went wrong, print the message and quit since we can't initialize an engine
|
||||
logger.init_err("KAI Horde Models", status="Failed")
|
||||
logger.error(req.json())
|
||||
emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
|
||||
return
|
||||
|
||||
engines = req.json()
|
||||
|
@ -222,18 +221,18 @@ class model_backend(InferenceModel):
|
|||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
request_id = req_status["id"]
|
||||
logger.debug("Horde Request ID: {}".format(request_id))
|
||||
self.request_id = req_status["id"]
|
||||
logger.debug("Horde Request ID: {}".format(self.request_id))
|
||||
|
||||
# We've sent the request and got the ID back, now we need to watch it to see when it finishes
|
||||
finished = False
|
||||
self.finished = False
|
||||
|
||||
cluster_agent_headers = {"Client-Agent": client_agent}
|
||||
|
||||
while not finished:
|
||||
while not self.finished:
|
||||
try:
|
||||
req = requests.get(
|
||||
f"{self.url}/api/v2/generate/text/status/{request_id}",
|
||||
f"{self.url}/api/v2/generate/text/status/{self.request_id}",
|
||||
headers=cluster_agent_headers,
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
|
@ -260,15 +259,16 @@ class model_backend(InferenceModel):
|
|||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
finished = req_status["done"]
|
||||
self.finished = req_status["done"]
|
||||
utils.koboldai_vars.horde_wait_time = req_status["wait_time"]
|
||||
utils.koboldai_vars.horde_queue_position = req_status["queue_position"]
|
||||
utils.koboldai_vars.horde_queue_size = req_status["waiting"]
|
||||
|
||||
if not finished:
|
||||
if not self.finished:
|
||||
logger.debug(req_status)
|
||||
time.sleep(1)
|
||||
|
||||
self.request_id = None
|
||||
logger.debug("Last Horde Status Message: {}".format(req_status))
|
||||
|
||||
if req_status["faulted"]:
|
||||
|
@ -287,3 +287,33 @@ class model_backend(InferenceModel):
|
|||
is_whole_generation=True,
|
||||
single_line=single_line,
|
||||
)
|
||||
|
||||
def abort_generation(self, abort=True):
|
||||
logger.info("Attempting to stop horde gen")
|
||||
self.finished = True
|
||||
try:
|
||||
# Create request
|
||||
client_agent = "KoboldAI:2.0.0:koboldai.org"
|
||||
cluster_headers = {
|
||||
"apikey": self.key,
|
||||
"Client-Agent": client_agent,
|
||||
}
|
||||
req = requests.delete(
|
||||
f"{self.url}/v2/generate/text/status/{self.request_id}",
|
||||
headers=cluster_headers,
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
errmsg = f"Horde unavailable. Please try again later"
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
|
||||
if req.status_code == 503:
|
||||
errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
|
||||
logger.error(errmsg)
|
||||
raise HordeException(errmsg)
|
||||
elif not req.ok:
|
||||
errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
|
||||
logger.error(req.url)
|
||||
logger.error(errmsg)
|
||||
logger.error(req.text)
|
||||
raise HordeException(errmsg)
|
|
@ -26,11 +26,11 @@ class Stoppers:
|
|||
f"Inconsistency detected between KoboldAI Python and Lua backends ({utils.koboldai_vars.generated_tkns} != {utils.koboldai_vars.lua_koboldbridge.generated_cols})"
|
||||
)
|
||||
|
||||
if utils.koboldai_vars.abort or (
|
||||
if model.abort or (
|
||||
utils.koboldai_vars.inference_config.stop_at_genamt
|
||||
and utils.koboldai_vars.generated_tkns >= utils.koboldai_vars.genamt
|
||||
):
|
||||
utils.koboldai_vars.abort = False
|
||||
model.abort = False
|
||||
model.gen_state["regeneration_required"] = False
|
||||
model.gen_state["halt"] = False
|
||||
return True
|
||||
|
|
|
@ -48,6 +48,7 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu
|
|||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
|
||||
windows-curses; sys_platform == 'win32'
|
||||
pynvml
|
||||
flash_attn==2.3.0
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
|
||||
xformers==0.0.21
|
||||
omegaconf
|
||||
|
|
2795
static/klite.html
2795
static/klite.html
File diff suppressed because one or more lines are too long
|
@ -100,7 +100,7 @@
|
|||
</div>
|
||||
</div><br>
|
||||
<div class="statusbar_outer_horde var_sync_alt_system_aibusy var_sync_alt_model_horde_wait_time" id="status_bar_horde">
|
||||
<div class="statusbar_inner_horde" style="width:100%">
|
||||
<div class="statusbar_inner_horde" style="width:100%" onclick="socket.emit('abort','');">
|
||||
<div> </div>
|
||||
<div>Queue <span class="var_sync_model_horde_queue_position"></span> of <span class="var_sync_model_horde_queue_size"></span></div>
|
||||
<div><span class="var_sync_model_horde_wait_time"></span> sec left</div>
|
||||
|
|
4
utils.py
4
utils.py
|
@ -98,9 +98,9 @@ def replaceblanklines(txt):
|
|||
#==================================================================#
|
||||
def removespecialchars(txt, koboldai_vars=None):
|
||||
if koboldai_vars is None or koboldai_vars.actionmode == 0:
|
||||
txt = re.sub(r"[#/@%<>{}+=~|\^]", "", txt)
|
||||
txt = re.sub(r"[/@%<>{}+=~|\^]", "", txt)
|
||||
else:
|
||||
txt = re.sub(r"[#/@%{}+=~|\^]", "", txt)
|
||||
txt = re.sub(r"[/@%{}+=~|\^]", "", txt)
|
||||
return txt
|
||||
|
||||
#==================================================================#
|
||||
|
|
Loading…
Reference in New Issue