diff --git a/aiserver.py b/aiserver.py index 10f327c7..de013a73 100644 --- a/aiserver.py +++ b/aiserver.py @@ -908,7 +908,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.2", + version="1.2.3", prefixes=["/api/v1", "/api/latest"], tags=tags, ) @@ -1695,9 +1695,6 @@ def load_model(model_backend, initial_load=False): koboldai_vars.aibusy = True koboldai_vars.horde_share = False - if initial_load: - use_breakmodel_args = True - koboldai_vars.reset_model() koboldai_vars.noai = False @@ -3227,7 +3224,7 @@ def actionsubmit( gen_mode=GenerationMode.STANDARD ): # Ignore new submissions if the AI is currently busy - if(koboldai_vars.aibusy): + if koboldai_vars.aibusy and not ignore_aibusy: return while(True): @@ -5105,9 +5102,13 @@ def load_story_v1(js, from_file=None): def load_story_v2(js, from_file=None): logger.debug("Loading V2 Story") logger.debug("Called from {}".format(inspect.stack()[1].function)) - leave_room(session['story']) - session['story'] = js['story_name'] - join_room(session['story']) + + new_story = js["story_name"] + # In socket context + if hasattr(request, "sid"): + leave_room(session['story']) + join_room(new_story) + session['story'] = new_story koboldai_vars.load_story(session['story'], js) @@ -8231,6 +8232,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema): class ModelSelectionSchema(KoboldSchema): model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'}) + backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys())) def _generate_text(body: GenerationInputSchema): if koboldai_vars.aibusy or koboldai_vars.genseqs: @@ -8488,6 +8490,7 @@ def put_model(body: ModelSelectionSchema): summary: Load a model description: |-2 Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model. + Optionally, a backend parameter can be passed in to dictate which backend loads the model. tags: - model requestBody: @@ -8497,6 +8500,7 @@ def put_model(body: ModelSelectionSchema): schema: ModelSelectionSchema example: model: ReadOnly + backend: Read Only responses: 200: description: Successful request @@ -8514,8 +8518,18 @@ def put_model(body: ModelSelectionSchema): set_aibusy(1) old_model = koboldai_vars.model koboldai_vars.model = body.model.strip() + + backend = getattr(body, "backend", None) + if not backend: + # Backend is optional for backwards compatibility; it should probably be + # required on the next major API version. + if body.model == "ReadOnly": + backend = "Read Only" + else: + backend = "Huggingface" + try: - load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True) + load_model(backend) except Exception as e: koboldai_vars.model = old_model raise e @@ -8803,8 +8817,14 @@ def get_story(): chunks = [] if koboldai_vars.gamestarted: chunks.append({"num": 0, "text": koboldai_vars.prompt}) - for num, action in koboldai_vars.actions.items(): - chunks.append({"num": num + 1, "text": action}) + + last_action_num = list(koboldai_vars.actions.actions.keys())[-1] + for num, action in koboldai_vars.actions.actions.items(): + text = action["Selected Text"] + # The last action seems to always be empty + if not text and num == last_action_num: + continue + chunks.append({"num": num + 1, "text": text}) return {"results": chunks} @@ -8828,7 +8848,7 @@ def get_story_nums(): chunks = [] if koboldai_vars.gamestarted: chunks.append(0) - for num in koboldai_vars.actions.keys(): + for num in koboldai_vars.actions.actions.keys(): chunks.append(num + 1) return {"results": chunks} @@ -9189,7 +9209,7 @@ def get_world_info(): if wi["folder"] != last_folder: folder = [] if wi["folder"] is not None: - folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder}) + folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder}) last_folder = wi["folder"] (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")}) return {"folders": folders, "entries": entries} diff --git a/koboldai_settings.py b/koboldai_settings.py index 976b6bcd..62e4918d 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -6,7 +6,7 @@ import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys import shutil from typing import List, Union from io import BytesIO -from flask import has_request_context, session +from flask import has_request_context, session, request from flask_socketio import join_room, leave_room from collections import OrderedDict import multiprocessing @@ -130,11 +130,14 @@ class koboldai_vars(object): original_story_name = story_name if not multi_story: story_name = 'default' - #Leave the old room and join the new one - logger.debug("Leaving room {}".format(session['story'])) - leave_room(session['story']) - logger.debug("Joining room {}".format(story_name)) - join_room(story_name) + + # Leave the old room and join the new one if in socket context + if hasattr(request, "sid"): + logger.debug("Leaving room {}".format(session['story'])) + leave_room(session['story']) + logger.debug("Joining room {}".format(story_name)) + join_room(story_name) + session['story'] = story_name logger.debug("Sending story reset") self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 83a5a318..a059ebb0 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -27,6 +27,10 @@ model_backend_name = "Huggingface" model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) class model_backend(HFTorchInferenceModel): + def __init__(self) -> None: + super().__init__() + self.use_4_bit = False + def is_valid(self, model_name, model_path, menu_path): base_is_valid = super().is_valid(model_name, model_path, menu_path) path = False diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 60b69476..167716d4 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -19,8 +19,12 @@ class HFInferenceModel(InferenceModel): def __init__(self) -> None: super().__init__() self.model_config = None - #self.model_name = model_name + # TODO: model_name should probably be an instantiation parameter all the + # way down the inheritance chain. + self.model_name = None + + self.path = None self.hf_torch = False self.model = None self.tokenizer = None @@ -217,6 +221,11 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass + + def _pre_load(self) -> None: + # HACK: Make model instantiation work without UI parameters + self.model_name = self.model_name or utils.koboldai_vars.model + return super()._pre_load() def _post_load(self) -> None: self.badwordsids = koboldai_settings.badwordsids_default diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 5a6b18c1..82e60304 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -93,7 +93,11 @@ class HFTorchInferenceModel(HFInferenceModel): self.hf_torch = True self.lazy_load = True self.low_mem = False + + # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel` + # indicates whether breakmodel is currently being used self.nobreakmodel = False + self.breakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens,