From fef42a6273064a1103715458505fe41f5e747b32 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 11:52:39 -0500 Subject: [PATCH 1/8] API: Fix loading --- aiserver.py | 18 ++++++++++++++---- .../inference_models/generic_hf_torch/class.py | 5 ++++- modeling/inference_models/hf.py | 11 ++++++++++- modeling/inference_models/hf_torch.py | 3 +++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0aa9bd4c..e76bf2c7 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1747,9 +1747,6 @@ def load_model(model_backend, initial_load=False): koboldai_vars.aibusy = True koboldai_vars.horde_share = False - if initial_load: - use_breakmodel_args = True - koboldai_vars.reset_model() koboldai_vars.noai = False @@ -8235,6 +8232,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema): class ModelSelectionSchema(KoboldSchema): model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'}) + backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys())) def _generate_text(body: GenerationInputSchema): if koboldai_vars.aibusy or koboldai_vars.genseqs: @@ -8492,6 +8490,7 @@ def put_model(body: ModelSelectionSchema): summary: Load a model description: |-2 Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model. + Optionally, a backend parameter can be passed in to dictate which backend loads the model. tags: - model requestBody: @@ -8501,6 +8500,7 @@ def put_model(body: ModelSelectionSchema): schema: ModelSelectionSchema example: model: ReadOnly + backend: Read Only responses: 200: description: Successful request @@ -8518,8 +8518,18 @@ def put_model(body: ModelSelectionSchema): set_aibusy(1) old_model = koboldai_vars.model koboldai_vars.model = body.model.strip() + + backend = getattr(body, "backend", None) + if not backend: + # Backend is optional for backwards compatibility; it should probably be + # required on the next major API version. + if body.model == "ReadOnly": + backend = "Read Only" + else: + backend = "Huggingface" + try: - load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True) + load_model(backend) except Exception as e: koboldai_vars.model = old_model raise e diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 93def5a6..541c3891 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -21,7 +21,10 @@ model_backend_name = "Huggingface" model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) class model_backend(HFTorchInferenceModel): - + def __init__(self) -> None: + super().__init__() + self.use_4_bit = False + def _initialize_model(self): return diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e407f5b4..27425a46 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -15,8 +15,12 @@ class HFInferenceModel(InferenceModel): def __init__(self) -> None: super().__init__() self.model_config = None - #self.model_name = model_name + # TODO: model_name should probably be an instantiation parameter all the + # way down the inheritance chain. + self.model_name = None + + self.path = None self.hf_torch = False self.model = None self.tokenizer = None @@ -213,6 +217,11 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass + + def _pre_load(self) -> None: + # HACK: Make model instantiation work without UI parameters + self.model_name = self.model_name or utils.koboldai_vars.model + return super()._pre_load() def _post_load(self) -> None: self.badwordsids = koboldai_settings.badwordsids_default diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 1b411c95..140acedc 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -89,7 +89,10 @@ class HFTorchInferenceModel(HFInferenceModel): self.hf_torch = True self.lazy_load = True self.low_mem = False + + # TODO: Mayyyybe only keep one of these variables self.nobreakmodel = False + self.breakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens, From 813e210127c2d990d43a8ae7cd90f7124ea64d61 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 11:52:49 -0500 Subject: [PATCH 2/8] Bump tiny API version As we're adding a new (though optional) parameter to load endpoint --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index e76bf2c7..95234644 100644 --- a/aiserver.py +++ b/aiserver.py @@ -892,7 +892,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.2", + version="1.2.3", prefixes=["/api/v1", "/api/latest"], tags=tags, ) From b9b3cd3aba4d076cf9c703b5cb29a2ecc4b6e431 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 12:02:53 -0500 Subject: [PATCH 3/8] API: Fix /story --- aiserver.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 95234644..2278015c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8817,8 +8817,14 @@ def get_story(): chunks = [] if koboldai_vars.gamestarted: chunks.append({"num": 0, "text": koboldai_vars.prompt}) - for num, action in koboldai_vars.actions.items(): - chunks.append({"num": num + 1, "text": action}) + + last_action_num = list(koboldai_vars.actions.actions.keys())[-1] + for num, action in koboldai_vars.actions.actions.items(): + text = action["Selected Text"] + # The last action seems to always be empty + if not text and num == last_action_num: + continue + chunks.append({"num": num + 1, "text": text}) return {"results": chunks} From 6da7a9629ad9c5ae2b25415e12174addc6b3b545 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:01:07 -0500 Subject: [PATCH 4/8] API: Fix /story/load --- aiserver.py | 10 +++++++--- koboldai_settings.py | 15 +++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2278015c..153e6d07 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5130,9 +5130,13 @@ def load_story_v1(js, from_file=None): def load_story_v2(js, from_file=None): logger.debug("Loading V2 Story") logger.debug("Called from {}".format(inspect.stack()[1].function)) - leave_room(session['story']) - session['story'] = js['story_name'] - join_room(session['story']) + + new_story = js["story_name"] + # In socket context + if hasattr(request, "sid"): + leave_room(session['story']) + join_room(new_story) + session['story'] = new_story koboldai_vars.load_story(session['story'], js) diff --git a/koboldai_settings.py b/koboldai_settings.py index ebd8c019..3bc0eb86 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -6,7 +6,7 @@ import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys import shutil from typing import List, Union from io import BytesIO -from flask import has_request_context, session +from flask import has_request_context, session, request from flask_socketio import join_room, leave_room from collections import OrderedDict import multiprocessing @@ -130,11 +130,14 @@ class koboldai_vars(object): original_story_name = story_name if not multi_story: story_name = 'default' - #Leave the old room and join the new one - logger.debug("Leaving room {}".format(session['story'])) - leave_room(session['story']) - logger.debug("Joining room {}".format(story_name)) - join_room(story_name) + + # Leave the old room and join the new one if in socket context + if hasattr(request, "sid"): + logger.debug("Leaving room {}".format(session['story'])) + leave_room(session['story']) + logger.debug("Joining room {}".format(story_name)) + join_room(story_name) + session['story'] = story_name logger.debug("Sending story reset") self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name) From 9726d12ede27bd3b214c412f48cc8dc9c4f66f1c Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:05:35 -0500 Subject: [PATCH 5/8] API: Fix /story/end (POST) --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 153e6d07..6e55b943 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3265,9 +3265,9 @@ def check_for_backend_compilation(): def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False): # Ignore new submissions if the AI is currently busy - if(koboldai_vars.aibusy): + if koboldai_vars.aibusy and not ignore_aibusy: return - + while(True): set_aibusy(1) koboldai_vars.actions.clear_unused_options() From 2d80f2ebb5902f4a2400227ab390f68f5b837e94 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:08:57 -0500 Subject: [PATCH 6/8] API: Fix getstorynums --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 6e55b943..c90d862c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8852,7 +8852,7 @@ def get_story_nums(): chunks = [] if koboldai_vars.gamestarted: chunks.append(0) - for num in koboldai_vars.actions.keys(): + for num in koboldai_vars.actions.actions.keys(): chunks.append(num + 1) return {"results": chunks} From 4335d1f46a9968dc6589daf4c147408835a6c5e9 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:18:45 -0500 Subject: [PATCH 7/8] API: Fix /world_info --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index c90d862c..604dba5f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -4424,7 +4424,7 @@ def requestwi(): # and items in different folders are sorted based on the order of the folders #==================================================================# def stablesortwi(): - mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)} + mapping = {str(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)} koboldai_vars.worldinfo.sort(key=lambda x: mapping[str(x["folder"])] if x["folder"] is not None else float("inf")) last_folder = ... last_wi = None @@ -9213,7 +9213,7 @@ def get_world_info(): if wi["folder"] != last_folder: folder = [] if wi["folder"] is not None: - folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder}) + folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder}) last_folder = wi["folder"] (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")}) return {"folders": folders, "entries": entries} From fa0a09994386f704e99ef7bebeb6903469aea044 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 10:38:17 -0500 Subject: [PATCH 8/8] Update comment --- modeling/inference_models/hf_torch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 140acedc..c4e82e6f 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -90,7 +90,8 @@ class HFTorchInferenceModel(HFInferenceModel): self.lazy_load = True self.low_mem = False - # TODO: Mayyyybe only keep one of these variables + # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel` + # indicates whether breakmodel is currently being used self.nobreakmodel = False self.breakmodel = False