Merge pull request #411 from one-some/fixing-time

Fix most of the API
2025-06-05 21:59:24 +02:00 · 2023-08-13 13:43:13 +02:00
parent dae9a6eb5a 906d1f2522
commit 89a805a0cc
5 changed files with 60 additions and 20 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -908,7 +908,7 @@ tags = [
 api_version = None  # This gets set automatically so don't change this value

 api_v1 = KoboldAPISpec(
-    version="1.2.2",
+    version="1.2.3",
    prefixes=["/api/v1", "/api/latest"],
    tags=tags,
 )
@@ -1695,9 +1695,6 @@ def load_model(model_backend, initial_load=False):
    koboldai_vars.aibusy = True
    koboldai_vars.horde_share = False

-    if initial_load:
-        use_breakmodel_args = True
-
    koboldai_vars.reset_model()

    koboldai_vars.noai = False
@@ -3227,7 +3224,7 @@ def actionsubmit(
    gen_mode=GenerationMode.STANDARD
 ):
    # Ignore new submissions if the AI is currently busy
-    if(koboldai_vars.aibusy):
+    if koboldai_vars.aibusy and not ignore_aibusy:
        return

    while(True):
@@ -5105,9 +5102,13 @@ def load_story_v1(js, from_file=None):
 def load_story_v2(js, from_file=None):
    logger.debug("Loading V2 Story")
    logger.debug("Called from {}".format(inspect.stack()[1].function))
-    leave_room(session['story'])
-    session['story'] = js['story_name']
-    join_room(session['story'])
+
+    new_story = js["story_name"]
+    # In socket context
+    if hasattr(request, "sid"):
+        leave_room(session['story'])
+        join_room(new_story)
+    session['story'] = new_story
    
    koboldai_vars.load_story(session['story'], js)
    
@@ -8231,6 +8232,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema):

 class ModelSelectionSchema(KoboldSchema):
    model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'})
+    backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys()))

 def _generate_text(body: GenerationInputSchema):
    if koboldai_vars.aibusy or koboldai_vars.genseqs:
@@ -8488,6 +8490,7 @@ def put_model(body: ModelSelectionSchema):
      summary: Load a model
      description: |-2
        Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model.
+        Optionally, a backend parameter can be passed in to dictate which backend loads the model.
      tags:
        - model
      requestBody:
@@ -8497,6 +8500,7 @@ def put_model(body: ModelSelectionSchema):
            schema: ModelSelectionSchema
            example:
              model: ReadOnly
+              backend: Read Only
      responses:
        200:
          description: Successful request
@@ -8514,8 +8518,18 @@ def put_model(body: ModelSelectionSchema):
    set_aibusy(1)
    old_model = koboldai_vars.model
    koboldai_vars.model = body.model.strip()
+
+    backend = getattr(body, "backend", None)
+    if not backend:
+        # Backend is optional for backwards compatibility; it should probably be
+        # required on the next major API version.
+        if body.model == "ReadOnly":
+            backend = "Read Only"
+        else:
+            backend = "Huggingface"
+
    try:
-        load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True)
+        load_model(backend)
    except Exception as e:
        koboldai_vars.model = old_model
        raise e
@@ -8803,8 +8817,14 @@ def get_story():
    chunks = []
    if koboldai_vars.gamestarted:
        chunks.append({"num": 0, "text": koboldai_vars.prompt})
-    for num, action in koboldai_vars.actions.items():
-        chunks.append({"num": num + 1, "text": action})
+
+    last_action_num = list(koboldai_vars.actions.actions.keys())[-1]
+    for num, action in koboldai_vars.actions.actions.items():
+        text = action["Selected Text"]
+        # The last action seems to always be empty
+        if not text and num == last_action_num:
+            continue
+        chunks.append({"num": num + 1, "text": text})
    return {"results": chunks}


@@ -8828,7 +8848,7 @@ def get_story_nums():
    chunks = []
    if koboldai_vars.gamestarted:
        chunks.append(0)
-    for num in koboldai_vars.actions.keys():
+    for num in koboldai_vars.actions.actions.keys():
        chunks.append(num + 1)
    return {"results": chunks}

@@ -9189,7 +9209,7 @@ def get_world_info():
            if wi["folder"] != last_folder:
                folder = []
                if wi["folder"] is not None:
-                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder})
+                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder})
                last_folder = wi["folder"]
            (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")})
    return {"folders": folders, "entries": entries}
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -6,7 +6,7 @@ import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys
 import shutil
 from typing import List, Union
 from io import BytesIO
-from flask import has_request_context, session
+from flask import has_request_context, session, request
 from flask_socketio import join_room, leave_room
 from collections import OrderedDict
 import multiprocessing
@@ -130,11 +130,14 @@ class koboldai_vars(object):
        original_story_name = story_name
        if not multi_story:
            story_name = 'default'
-        #Leave the old room and join the new one
-        logger.debug("Leaving room {}".format(session['story']))
-        leave_room(session['story'])
-        logger.debug("Joining room {}".format(story_name))
-        join_room(story_name)
+
+        # Leave the old room and join the new one if in socket context
+        if hasattr(request, "sid"):
+            logger.debug("Leaving room {}".format(session['story']))
+            leave_room(session['story'])
+            logger.debug("Joining room {}".format(story_name))
+            join_room(story_name)
+
        session['story'] = story_name
        logger.debug("Sending story reset")
        self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name)
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -27,6 +27,10 @@ model_backend_name = "Huggingface"
 model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)

 class model_backend(HFTorchInferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+        self.use_4_bit = False
+
    def is_valid(self, model_name, model_path, menu_path):
        base_is_valid = super().is_valid(model_name, model_path, menu_path)
        path = False
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -19,8 +19,12 @@ class HFInferenceModel(InferenceModel):
    def __init__(self) -> None:
        super().__init__()
        self.model_config = None
-        #self.model_name = model_name

+        # TODO: model_name should probably be an instantiation parameter all the
+        # way down the inheritance chain.
+        self.model_name = None
+
+        self.path = None
        self.hf_torch = False
        self.model = None
        self.tokenizer = None
@@ -217,6 +221,11 @@ class HFInferenceModel(InferenceModel):
                torch.cuda.empty_cache()
        except:
            pass
+    
+    def _pre_load(self) -> None:
+        # HACK: Make model instantiation work without UI parameters
+        self.model_name = self.model_name or utils.koboldai_vars.model
+        return super()._pre_load()

    def _post_load(self) -> None:
        self.badwordsids = koboldai_settings.badwordsids_default
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -93,7 +93,11 @@ class HFTorchInferenceModel(HFInferenceModel):
        self.hf_torch = True
        self.lazy_load = True
        self.low_mem = False
+
+        # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel`
+        # indicates whether breakmodel is currently being used
        self.nobreakmodel = False
+        self.breakmodel = False

        self.post_token_hooks = [
            PostTokenHooks.stream_tokens,