From f7b720b127444f7124988757e6dfa9f8966e9c9f Mon Sep 17 00:00:00 2001 From: henk717 Date: Wed, 31 Aug 2022 02:16:35 +0200 Subject: [PATCH 01/13] Clarify umamba People wanted the source code of umamba.exe so we clarify where it was taken from and where its source code can be found. --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 968aa995..0d69934b 100644 --- a/readme.md +++ b/readme.md @@ -216,4 +216,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.). -umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. +umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. It has been sourced from https://anaconda.org/conda-forge/micromamba/files and its source code can be found here : https://github.com/mamba-org/mamba/tree/master/micromamba From 16fae3c6df8cb0419deb121fb505f8532b57fcc8 Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Wed, 7 Sep 2022 01:36:59 +0200 Subject: [PATCH 02/13] username > api_key --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index cdd50ed5..f808d0cd 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5184,7 +5184,7 @@ def sendtocluster(txt, min, max): cluster_metadata = { 'prompt': txt, 'params': reqdata, - 'username': vars.apikey, + 'api_key': vars.apikey, 'models': vars.cluster_requested_models, } From 153f6b6c92bc077f1750714d452172df506941fa Mon Sep 17 00:00:00 2001 From: vfbd Date: Wed, 7 Sep 2022 13:21:49 -0400 Subject: [PATCH 03/13] Fix hidden size calculation for GPT-NeoX models --- aiserver.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/aiserver.py b/aiserver.py index f808d0cd..76f9b366 100644 --- a/aiserver.py +++ b/aiserver.py @@ -2370,19 +2370,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal def get_hidden_size_from_model(model): - try: - return int(model.model.decoder.project_in.in_features) - except: - try: - return int(model.model.decoder.embed_tokens.out_features) - except: - try: - return int(model.transformer.hidden_size) - except: - try: - return int(model.transformer.embed_dim) - except: - return int(model.lm_head.in_features) + return model.get_input_embeddings().embedding_dim def maybe_low_cpu_mem_usage() -> Dict[str, Any]: if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")): From 9debec921aac3e987522150f7906ef2f0d904073 Mon Sep 17 00:00:00 2001 From: henk717 Date: Fri, 9 Sep 2022 15:57:27 +0200 Subject: [PATCH 04/13] Erebus 13B --- colab/TPU.ipynb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 22c35e08..745ad3a4 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -89,6 +89,10 @@ " Model = \"KoboldAI/fairseq-dense-13B-Nerys-v2\"\n", " path = \"\"\n", " download = \"\"\n", + "elif Model == \"Erebus 13B\":\n", + " Model = \"KoboldAI/OPT-13B-erebus\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Shinen 13B\":\n", " Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n", " path = \"\"\n", From 08a875ca6305c2f6e4458fb88e9a0729b71ca056 Mon Sep 17 00:00:00 2001 From: henk717 Date: Fri, 9 Sep 2022 15:59:27 +0200 Subject: [PATCH 05/13] Erebus 13B --- colab/TPU.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 745ad3a4..40a4517a 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -66,7 +66,7 @@ "#@title <-- Select your model below and then click this to start KoboldAI\n", "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n", "\n", - "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n", + "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", "\n", From fdd222e68f1472b206ad5c772577e9f728e67e53 Mon Sep 17 00:00:00 2001 From: henk717 Date: Fri, 9 Sep 2022 16:43:19 +0200 Subject: [PATCH 06/13] Erebus link update It changed on Huggingface, so lets update it here to --- colab/TPU.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 40a4517a..d4719169 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -90,7 +90,7 @@ " path = \"\"\n", " download = \"\"\n", "elif Model == \"Erebus 13B\":\n", - " Model = \"KoboldAI/OPT-13B-erebus\"\n", + " Model = \"KoboldAI/OPT-13B-Erebus\"\n", " path = \"\"\n", " download = \"\"\n", "elif Model == \"Shinen 13B\":\n", @@ -233,4 +233,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From 13ca46598095c9ac6476885af29c0a335fed03f5 Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sat, 10 Sep 2022 10:03:46 +0200 Subject: [PATCH 07/13] horde error handling --- aiserver.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/aiserver.py b/aiserver.py index 76f9b366..7e1f909c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5175,21 +5175,33 @@ def sendtocluster(txt, min, max): 'api_key': vars.apikey, 'models': vars.cluster_requested_models, } - - # Create request - req = requests.post( - vars.colaburl[:-8] + "/generate/sync", - json=cluster_metadata, - ) - js = req.json() + try: + # Create request + req = requests.post( + vars.colaburl[:-8] + "/generate/sync", + json=cluster_metadata, + ) + js = req.json() + except requests.exceptions.ConnectionError: + errmsg ="Horde unavailable. Please try again later" + print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) + set_aibusy(0) + return + except requests.exceptions.JSONDecodeError: + errmsg ="Unexpected message received from the Horde: '{req.text}'" + print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) + set_aibusy(0) + return if(req.status_code == 503): - errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths." + errmsg = "KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return if(req.status_code != 200): - errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console." + errmsg = "KoboldAI API Error: Failed to get a standard from the Horde. Please check the console." print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) From 684399cdd66a85fe6efd74bebb9962cf6e09532a Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sat, 10 Sep 2022 14:51:31 +0200 Subject: [PATCH 08/13] fix the fix --- aiserver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index 7e1f909c..a751a85c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5184,13 +5184,13 @@ def sendtocluster(txt, min, max): js = req.json() except requests.exceptions.ConnectionError: errmsg ="Horde unavailable. Please try again later" - print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + print("{0}{1}{2}".format(colors.RED, json.dumps(errmsg, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return except requests.exceptions.JSONDecodeError: errmsg ="Unexpected message received from the Horde: '{req.text}'" - print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + print("{0}{1}{2}".format(colors.RED, json.dumps(errmsg, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return @@ -5201,7 +5201,7 @@ def sendtocluster(txt, min, max): set_aibusy(0) return if(req.status_code != 200): - errmsg = "KoboldAI API Error: Failed to get a standard from the Horde. Please check the console." + errmsg = "KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) From d6fc61739f3552e2ea957ff25f5086529abcc6ab Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sat, 10 Sep 2022 14:52:24 +0200 Subject: [PATCH 09/13] no need to dmp json --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index a751a85c..ab2f92a1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5184,13 +5184,13 @@ def sendtocluster(txt, min, max): js = req.json() except requests.exceptions.ConnectionError: errmsg ="Horde unavailable. Please try again later" - print("{0}{1}{2}".format(colors.RED, json.dumps(errmsg, indent=2), colors.END)) + print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return except requests.exceptions.JSONDecodeError: errmsg ="Unexpected message received from the Horde: '{req.text}'" - print("{0}{1}{2}".format(colors.RED, json.dumps(errmsg, indent=2), colors.END)) + print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return From 2eefb488d5aef36f1f7149d4db3c688ebdf378ee Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sat, 10 Sep 2022 15:17:13 +0200 Subject: [PATCH 10/13] f-string --- aiserver.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index ab2f92a1..fc308f19 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5183,25 +5183,25 @@ def sendtocluster(txt, min, max): ) js = req.json() except requests.exceptions.ConnectionError: - errmsg ="Horde unavailable. Please try again later" + errmsg = f"Horde unavailable. Please try again later" print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return except requests.exceptions.JSONDecodeError: - errmsg ="Unexpected message received from the Horde: '{req.text}'" + errmsg = f"Unexpected message received from the Horde: '{req.text}'" print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return if(req.status_code == 503): - errmsg = "KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." + errmsg = f"KoboldAI API Error: No available KoboldAI servers found in Horde to fulfil this request using the selected models or other properties." print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return if(req.status_code != 200): - errmsg = "KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." + errmsg = f"KoboldAI API Error: Failed to get a standard reply from the Horde. Please check the console." print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) From 888e33a63eb0271ab88aa21430ea445bfd7b247f Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sat, 10 Sep 2022 19:02:55 +0200 Subject: [PATCH 11/13] switch to new API --- aiserver.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index fc308f19..daeae33c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5178,7 +5178,7 @@ def sendtocluster(txt, min, max): try: # Create request req = requests.post( - vars.colaburl[:-8] + "/generate/sync", + vars.colaburl[:-8] + "/api/v1/generate/sync", json=cluster_metadata, ) js = req.json() @@ -5206,7 +5206,13 @@ def sendtocluster(txt, min, max): emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) set_aibusy(0) return - genout = js + gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js] + print(f"Generations by: {gen_servers}") + # Just in case we want to announce it to the user + if len(js) == 1: + warnmsg = f"Text generated by {js[0]['server_name']}" + emit('from_server', {'cmd': 'warnmsg', 'data': warnmsg}, broadcast=True) + genout = [cgen['text'] for cgen in js] for i in range(vars.numseqs): vars.lua_koboldbridge.outputs[i+1] = genout[i] From f97d285f9f7e7ff4ffe1d0cd7105cc19292924ab Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Sun, 11 Sep 2022 19:23:28 +0200 Subject: [PATCH 12/13] Allows to specify to the API to go quiet --- aiserver.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index daeae33c..1802a502 100644 --- a/aiserver.py +++ b/aiserver.py @@ -17,6 +17,7 @@ os.environ['TOKENIZERS_PARALLELISM'] = 'false' from eventlet import tpool import logging +logging.basicConfig(format='%(levelname)s - %(module)s:%(lineno)d - %(message)s',level=logging.WARNING) logging.getLogger("urllib3").setLevel(logging.ERROR) from os import path, getcwd @@ -5207,7 +5208,7 @@ def sendtocluster(txt, min, max): set_aibusy(0) return gen_servers = [(cgen['server_name'],cgen['server_id']) for cgen in js] - print(f"Generations by: {gen_servers}") + print(f"{colors.GREEN}Generations by: {gen_servers}{colors.END}") # Just in case we want to announce it to the user if len(js) == 1: warnmsg = f"Text generated by {js[0]['server_name']}" @@ -7408,6 +7409,7 @@ class GenerationInputSchema(SamplerSettingsSchema): singleline: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes everything after the first line of the output, including the newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"}) frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) + quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."}) class GenerationResultSchema(KoboldSchema): text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."}) @@ -7498,6 +7500,10 @@ def _generate_text(body: GenerationInputSchema): "msg": "Server is busy; please try again later.", "type": "service_unavailable", }}), mimetype="application/json", status=503)) + # This maps each property of the setting to use when sending the generate idempotently + # To the object which typically contains it's value + # This allows to set the property only for the API generation, and then revert the setting + # To what it was before. mapping = { "disable_input_formatting": ("vars", "disable_input_formatting", None), "disable_output_formatting": ("vars", "disable_output_formatting", None), @@ -7518,6 +7524,7 @@ def _generate_text(body: GenerationInputSchema): "max_length": ("vars", "genamt", None), "max_context_length": ("vars", "max_length", None), "n": ("vars", "numseqs", None), + "quiet": ("vars", "quiet", None), } saved_settings = {} set_aibusy(1) From 359f95aff1c5bc5c8f2a89a93f421c1114ac78e4 Mon Sep 17 00:00:00 2001 From: henk717 Date: Tue, 13 Sep 2022 14:59:23 +0200 Subject: [PATCH 13/13] Downgrade Chex Chex got a breaking update, while it is not a direct dependency we add it so that optax and jax don't break. --- requirements_mtj.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_mtj.txt b/requirements_mtj.txt index c5c17a19..b44e5524 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -17,3 +17,4 @@ eventlet lupa==1.10 markdown bleach==4.1.0 +chex==0.1.4