Merge commit 'refs/pull/123/head' of https://github.com/ebolam/KoboldAI into UI2

2025-06-05 21:59:24 +02:00 · 2022-09-14 14:04:51 -04:00
parent 6ae7f76b60 be7f7cab7e
commit abf822eb3d
4 changed files with 45 additions and 27 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -17,6 +17,7 @@ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 from eventlet import tpool

 import logging
+logging.basicConfig(format='%(levelname)s - %(module)s:%(lineno)d - %(message)s',level=logging.WARNING)
 logging.getLogger("urllib3").setLevel(logging.ERROR)

 from os import path, getcwd
@@ -2194,19 +2195,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal


            def get_hidden_size_from_model(model):
-                try:
-                    return int(model.model.decoder.project_in.in_features)
-                except:
-                    try:
-                        return int(model.model.decoder.embed_tokens.out_features)
-                    except:
-                        try:
-                            return int(model.transformer.hidden_size)
-                        except:
-                            try:
-                                return int(model.transformer.embed_dim)
-                            except:
-                                return int(model.lm_head.in_features)
+                return model.get_input_embeddings().embedding_dim
            
            def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
                if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
@@ -5013,29 +5002,47 @@ def sendtocluster(txt, min, max):
    cluster_metadata = {
        'prompt': txt,
        'params': reqdata,
-        'username': koboldai_vars.apikey,
+        'api_key': koboldai_vars.apikey,
        'models': koboldai_vars.cluster_requested_models,
    }
-
-    # Create request
-    req = requests.post(
-        koboldai_vars.colaburl[:-8] + "/generate/sync",
-        json=cluster_metadata,
-    )
-    js = req.json()
+    try:
+        # Create request
+        req = requests.post(
+            koboldai_vars.colaburl[:-8] + "/api/v1/generate/sync",
+            json=cluster_metadata,
+        )
+        js = req.json()
+    except requests.exceptions.ConnectionError:
+        errmsg = f"Horde unavailable. Please try again later"
+        print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
+        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
+        set_aibusy(0)
+        return
+    except requests.exceptions.JSONDecodeError:
+        errmsg = f"Unexpected message received from the Horde: '{req.text}'"
+        print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
+        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
+        set_aibusy(0)
+        return
    if(req.status_code == 503):
-        errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths."
+        errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties."
        print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
        set_aibusy(0)
        return
    if(req.status_code != 200):
-        errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console."
+        errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console."
        print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END))
        emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True)
        set_aibusy(0)
        return
-    genout = js
+    gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js]
+    print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}")
+    # Just in case we want to announce it to the user
+    if len(js) == 1:        
+        warnmsg = f"Text generated by {js[0]['server_name']}"
+        emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True)
+    genout = [cgen['text'] for cgen in js]

    for i in range(koboldai_vars.numseqs):
        koboldai_vars.lua_koboldbridge.outputs[i+1] = genout[i]
@@ -7992,6 +7999,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
    singleline: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes everything after the first line of the output, including the newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
    frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
+    quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})

 class GenerationResultSchema(KoboldSchema):
    text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."})
@@ -8082,6 +8090,10 @@ def _generate_text(body: GenerationInputSchema):
            "msg": "Server is busy; please try again later.",
            "type": "service_unavailable",
        }}), mimetype="application/json", status=503))
+    # This maps each property of the setting to use when sending the generate idempotently
+    # To the object which typically contains it's value
+    # This allows to set the property only for the API generation, and then revert the setting
+    # To what it was before.
    mapping = {
        "disable_input_formatting": ("koboldai_vars", "disable_input_formatting", None),
        "disable_output_formatting": ("koboldai_vars", "disable_output_formatting", None),
@@ -8102,6 +8114,7 @@ def _generate_text(body: GenerationInputSchema):
        "max_length": ("koboldai_vars", "genamt", None),
        "max_context_length": ("koboldai_vars", "max_length", None),
        "n": ("koboldai_vars", "numseqs", None),
+        "quiet": ("koboldai_vars", "quiet", None),
    }
    saved_settings = {}
    set_aibusy(1)
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@@ -66,7 +66,7 @@
        "#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
        "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
        "\n",
-        "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
+        "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
        "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
        "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
        "\n",
@@ -89,6 +89,10 @@
        "  Model = \"KoboldAI/fairseq-dense-13B-Nerys-v2\"\n",
        "  path = \"\"\n",
        "  download = \"\"\n",
+        "elif Model == \"Erebus 13B\":\n",
+        "  Model = \"KoboldAI/OPT-13B-Erebus\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Shinen 13B\":\n",
        "  Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
        "  path = \"\"\n",
@@ -229,4 +233,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
--- a/readme.md
+++ b/readme.md
@@ -216,4 +216,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t

 KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.).

-umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file.
+umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. It has been sourced from https://anaconda.org/conda-forge/micromamba/files and its source code can be found here : https://github.com/mamba-org/mamba/tree/master/micromamba
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -17,6 +17,7 @@ eventlet
 lupa==1.10
 markdown
 bleach==4.1.0
+chex==0.1.4
 flask-session
 marshmallow>=3.13
 apispec-webframeworks