From 1628b789d16ce8a463c44376197f566b7e395cae Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 9 Jan 2023 23:36:43 +0100 Subject: [PATCH 01/13] Add Pygmalion --- colab/TPU.ipynb | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index efdfa5c4..f40dad68 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -66,10 +66,10 @@ "#@title <-- Select your model below and then click this to start KoboldAI\n", "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n", "\n", - "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n", + "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", - "use_google_drive = True #@param {type:\"boolean\"}\n", + "use_google_drive = True #@param {type:\"boolean\"}\n", "\n", "import os\n", "try:\n", @@ -81,13 +81,13 @@ "print('Now we will need your Google Drive to store settings and saves, you must login with the same account you used for Colab.')\n", "from google.colab import drive\n", "if use_google_drive:\n", - " drive.mount('/content/drive/')\n", - "else:\n", - " import os\n", - " if not os.path.exists(\"/content/drive\"):\n", - " os.mkdir(\"/content/drive\")\n", - " if not os.path.exists(\"/content/drive/MyDrive/\"):\n", - " os.mkdir(\"/content/drive/MyDrive/\")\n", + " drive.mount('/content/drive/')\n", + "else:\n", + " import os\n", + " if not os.path.exists(\"/content/drive\"):\n", + " os.mkdir(\"/content/drive\")\n", + " if not os.path.exists(\"/content/drive/MyDrive/\"):\n", + " os.mkdir(\"/content/drive/MyDrive/\")\n", "\n", "if Model == \"Janeway 13B\":\n", " Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n", @@ -129,6 +129,10 @@ " Model = \"KoboldAI/GPT-J-6B-Adventure\"\n", " path = \"\"\n", " download = \"\"\n", + "elif Model == \"Pygmalion 6B\":\n", + " Model = \"PygmalionAI/pygmalion-6b\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Lit V2 6B\":\n", " Model = \"hakurei/litv2-6B-rev3\"\n", " path = \"\"\n", @@ -178,6 +182,7 @@ "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n", "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", + "| [Pygmalion](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by PygmalionAI | Chatbot | Pygmalion is a chat model that has been based on a few models that came before it. First the model originates from LitV2, it was then trained by Haru on a chat dataset to create ConvoGPT. ConvoGPT was then trained by PygmalionAI on chat data that contains longer responses and emotions. Making for a higher quality chat experience than you can get from other models such as Erebus that are not directly trained on chatting. |\n", "| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", "| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n", "| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n", @@ -240,7 +245,6 @@ "name": "ColabKobold TPU", "provenance": [], "private_outputs": true, - "collapsed_sections": [], "include_colab_link": true }, "kernelspec": { From 4a88e41d14073e84a9714f0d880d4d4e7255edcc Mon Sep 17 00:00:00 2001 From: henk717 Date: Tue, 10 Jan 2023 17:22:03 +0100 Subject: [PATCH 02/13] Pygmalion 6B --- colab/GPU.ipynb | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb index 203f53ba..927f5015 100644 --- a/colab/GPU.ipynb +++ b/colab/GPU.ipynb @@ -5,8 +5,7 @@ "colab": { "name": "ColabKobold GPU", "private_outputs": true, - "provenance": [], - "include_colab_link": true + "provenance": [] }, "kernelspec": { "display_name": "Python 3", @@ -18,16 +17,6 @@ "accelerator": "GPU" }, "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "markdown", "metadata": { @@ -82,7 +71,7 @@ "#@title <-- Select your model below and then click this to start KoboldAI\n", "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n", "\n", - "Model = \"Nerys V2 6B\" #@param [\"Nerys V2 6B\", \"Erebus 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Lit V2 6B\", \"Lit 6B\", \"Shinen 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n", + "Model = \"Nerys V2 6B\" #@param [\"Nerys V2 6B\", \"Erebus 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"Shinen 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", "use_google_drive = True #@param {type:\"boolean\"}\n", @@ -118,6 +107,10 @@ " Model = \"KoboldAI/GPT-J-6B-Adventure\"\n", " path = \"\"\n", " download = \"\"\n", + "elif Model == \"Pygmalion 6B\":\n", + " Model = \"PygmalionAI/pygmalion-6b\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Lit V2 6B\":\n", " Model = \"hakurei/litv2-6B-rev3\"\n", " path = \"\"\n", @@ -196,6 +189,7 @@ "| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", "| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n", "| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n", + "| [Pygmalion](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by PygmalionAI | Chatbot | Pygmalion is a chat model that has been based on a few models that came before it. First the model originates from LitV2, it was then trained by Haru on a chat dataset to create ConvoGPT. ConvoGPT was then trained by PygmalionAI on chat data that contains longer responses and emotions. Making for a higher quality chat experience than you can get from other models such as Erebus that are not directly trained on chatting. |\n", "| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n", "| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n", "| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", @@ -213,6 +207,7 @@ "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n", "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", + "| [Pygmalion](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by PygmalionAI | Chatbot | Pygmalion is a chat model that has been based on a few models that came before it. First the model originates from LitV2, it was then trained by Haru on a chat dataset to create ConvoGPT. ConvoGPT was then trained by PygmalionAI on chat data that contains longer responses and emotions. Making for a higher quality chat experience than you can get from other models such as Erebus that are not directly trained on chatting. |\n", "| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", "| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n", "| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n", From ddaf224f83ab395e97e647b753fae4db8f173ad6 Mon Sep 17 00:00:00 2001 From: somebody Date: Tue, 10 Jan 2023 18:46:52 -0600 Subject: [PATCH 03/13] Rich text STOP NOW!!!! --- static/koboldai.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index f367d684..8fdc980e 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -7273,4 +7273,15 @@ async function screenshot_selection(summonEvent) { let endDebt = totalText.indexOf(selectionContent) + selectionContent.length; await showScreenshotWizard(actionComposition, startDebt=startDebt, endDebt=endDebt, totalText); -} \ No newline at end of file +} + +$el("#gamescreen").addEventListener("paste", function(event) { + // Get rid of rich text, it messes with actions. Not a great fix since it + // relies on execCommand but it'll have to do + event.preventDefault(); + document.execCommand( + "insertHTML", + false, + event.clipboardData.getData("text/plain") + ); +}); \ No newline at end of file From 271e4ed06bf6b807d3394a5ccf7bbca8e515e343 Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 11 Jan 2023 21:33:25 +0100 Subject: [PATCH 04/13] Chat Mode Improvements This commit decouples single line mode, well behaved models no longer need this since we stop at the You:. There are scenario's however where this potentially breaks chatmode completely or makes models more frustrating to use. Users who experience this can enable the Single Line mode in the formatting menu to restore the old behavior. I have also allowed token streaming again, since the issues with it have already been resolved. --- aiserver.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 6f30c1f0..99696559 100644 --- a/aiserver.py +++ b/aiserver.py @@ -2474,9 +2474,6 @@ def patch_transformers(): if not koboldai_vars.output_streaming: return False - - if koboldai_vars.chatmode: - return False data = [applyoutputformatting(utils.decodenewlines(tokenizer.decode(x[-1])), no_sentence_trimming=True, no_single_line=True) for x in input_ids] koboldai_vars.actions.stream_tokens(data) @@ -6507,7 +6504,7 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False) if(koboldai_vars.frmtrmspch): txt = utils.removespecialchars(txt, koboldai_vars) # Single Line Mode - if((koboldai_vars.singleline or koboldai_vars.chatmode) and not no_single_line): + if(koboldai_vars.singleline and not no_single_line): txt = utils.singlelineprocessing(txt, koboldai_vars) for sub in koboldai_vars.substitutions: From d6a941de61e26604ea279774b4323f1735dd79f8 Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 11 Jan 2023 22:24:12 +0100 Subject: [PATCH 05/13] Restore Chat Models Menu + Safetensors Workaround This commit restores the chat models menu now we finally have good chat models available again. Unfortunately huggingface reports back pytorch_model.bin even if the model's name is model.safetensors. I don't have a good way to combat this at the moment, so instead we now do a hack where if the model copy fails it manually tries model.safetensors instead hoping that it will work. This fixes Pygmalion for now, if new issues arise from this in the future from other models we have to implement a cleaner method. --- aiserver.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 99696559..5415e06d 100644 --- a/aiserver.py +++ b/aiserver.py @@ -168,6 +168,7 @@ model_menu = { ["Load custom model from Hugging Face", "customhuggingface", "", True], ["Adventure Models", "adventurelist", "", True], ["Novel Models", "novellist", "", True], + ["Chat Models", "chatlist", "", True], ["NSFW Models", "nsfwlist", "", True], ["Untuned OPT", "optlist", "", True], ["Untuned GPT-Neo/J", "gptneolist", "", True], @@ -225,9 +226,10 @@ model_menu = { ["Return to Main Menu", "mainmenu", "", True], ], 'chatlist': [ - ["Convo 6B (Chatbot)", "hitomi-team/convo-6B", "16GB", False], - ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB", False], - ["C1 1.3B (Chatbot)", "iokru/c1-1.3B", "6GB", False], + ["Pygmalion 6B", "PygmalionAI/pygmalion-6b", "16GB", False], + ["Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "8GB", False], + ["Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "6GB", False], + ["Pygmalion 350M", "PygmalionAI/pygmalion-350m", "2GB", False], ["Return to Main Menu", "mainmenu", "", True], ], 'gptneolist': [ @@ -3207,7 +3209,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.configuration_utils.CONFIG_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME)) if(utils.num_shards is None): # Save the pytorch_model.bin of an unsharded model - shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME)) + try: + shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME)) + except: + shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, "model.safetensors", revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), "model.safetensors")) else: with open(utils.from_pretrained_index_filename) as f: map_data = json.load(f) From f1739dd1846cc3cd8b2766e9de8c177158f128eb Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 13 Jan 2023 13:04:54 +0100 Subject: [PATCH 06/13] Chatmode Regex --- aiserver.py | 6 ++++-- utils.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 5415e06d..cab01077 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6475,7 +6475,7 @@ def getnewcontent(txt): #==================================================================# def applyinputformatting(txt): # Add sentence spacing - if(koboldai_vars.frmtadsnsp): + if(koboldai_vars.frmtadsnsp and not koboldai_vars.chatmode): txt = utils.addsentencespacing(txt, koboldai_vars) return txt @@ -6511,7 +6511,9 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False) # Single Line Mode if(koboldai_vars.singleline and not no_single_line): txt = utils.singlelineprocessing(txt, koboldai_vars) - + # Chat Mode Trimming + if(koboldai_vars.chatmode): + txt = utils.chatmodeprocessing(txt, koboldai_vars) for sub in koboldai_vars.substitutions: if not sub["enabled"]: continue diff --git a/utils.py b/utils.py index b8baa786..22fc05a4 100644 --- a/utils.py +++ b/utils.py @@ -147,6 +147,24 @@ def singlelineprocessing(txt, koboldai_vars): txt = txt + "\n" return txt +def chatmodeprocessing(txt, koboldai_vars): + chatregex = re.compile(r'%s:[.|\n|\W|\w]*'%koboldai_vars.chatname) + txt = chatregex.sub('', txt) + if(len(koboldai_vars.actions) > 0): + if(len(koboldai_vars.actions[-1]) > 0): + action = koboldai_vars.actions[-1] + lastchar = action[-1] if len(action) else "" + else: + # Last action is blank, this should never happen, but + # since it did let's bail out. + return txt + else: + action = koboldai_vars.prompt + lastchar = action[-1] if len(action) else "" + if(lastchar != "\n"): + txt = txt + "\n" + return txt + #==================================================================# # Cleans string for use in file name #==================================================================# From 469fb8a5feaa6a2e86ee72eacc7cb863051c30dd Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 13 Jan 2023 19:11:21 +0100 Subject: [PATCH 07/13] Transformers 4.25.1 This is a breaking change that allows 4.25.1 to work because they also have done breaking changes. If you do not make use of our automatic updater please update the dependencies when updating to this build. --- aiserver.py | 33 +++++++++++++++++---------------- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/aiserver.py b/aiserver.py index cab01077..716ac56c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -77,7 +77,8 @@ try: from transformers.models.opt.modeling_opt import OPTDecoder except: pass -import transformers.generation_utils + +from transformers import GenerationMixin # Text2img import base64 @@ -127,20 +128,20 @@ class use_core_manipulations: old_get_stopping_criteria: callable def __enter__(self): - use_core_manipulations.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor - transformers.generation_utils.GenerationMixin._get_logits_processor = use_core_manipulations.get_logits_processor + use_core_manipulations.old_get_logits_processor = transformers.GenerationMixin._get_logits_processor + transformers.GenerationMixin._get_logits_processor = use_core_manipulations.get_logits_processor - use_core_manipulations.old_sample = transformers.generation_utils.GenerationMixin.sample - transformers.generation_utils.GenerationMixin.sample = use_core_manipulations.sample + use_core_manipulations.old_sample = transformers.GenerationMixin.sample + transformers.GenerationMixin.sample = use_core_manipulations.sample - use_core_manipulations.old_get_stopping_criteria = transformers.generation_utils.GenerationMixin._get_stopping_criteria - transformers.generation_utils.GenerationMixin._get_stopping_criteria = use_core_manipulations.get_stopping_criteria + use_core_manipulations.old_get_stopping_criteria = transformers.GenerationMixin._get_stopping_criteria + transformers.GenerationMixin._get_stopping_criteria = use_core_manipulations.get_stopping_criteria return self def __exit__(self, exc_type, exc_value, exc_traceback): - transformers.generation_utils.GenerationMixin._get_logits_processor = use_core_manipulations.old_get_logits_processor - transformers.generation_utils.GenerationMixin.sample = use_core_manipulations.old_sample - transformers.generation_utils.GenerationMixin._get_stopping_criteria = use_core_manipulations.old_get_stopping_criteria + transformers.GenerationMixin._get_logits_processor = use_core_manipulations.old_get_logits_processor + transformers.GenerationMixin.sample = use_core_manipulations.old_sample + transformers.GenerationMixin._get_stopping_criteria = use_core_manipulations.old_get_stopping_criteria #==================================================================# # Variables & Storage @@ -2414,7 +2415,7 @@ def patch_transformers(): processors.append(PhraseBiasLogitsProcessor()) return processors use_core_manipulations.get_logits_processor = new_get_logits_processor - new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor + new_get_logits_processor.old_get_logits_processor = transformers.GenerationMixin._get_logits_processor class KoboldLogitsWarperList(LogitsProcessorList): def __init__(self, beams: int = 1, **kwargs): @@ -2449,15 +2450,15 @@ def patch_transformers(): kwargs.setdefault("pad_token_id", 2) return new_sample.old_sample(self, *args, **kwargs) - new_sample.old_sample = transformers.generation_utils.GenerationMixin.sample + new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample # Allow bad words filter to ban <|endoftext|> token - import transformers.generation_logits_process + import transformers.generation.logits_process def new_init(self, bad_words_ids: List[List[int]], eos_token_id: int): return new_init.old_init(self, bad_words_ids, -1) - new_init.old_init = transformers.generation_logits_process.NoBadWordsLogitsProcessor.__init__ - transformers.generation_logits_process.NoBadWordsLogitsProcessor.__init__ = new_init + new_init.old_init = transformers.generation.logits_process.NoBadWordsLogitsProcessor.__init__ + transformers.generation.logits_process.NoBadWordsLogitsProcessor.__init__ = new_init class TokenStreamer(StoppingCriteria): # A StoppingCriteria is used here because it seems to run after @@ -2615,7 +2616,7 @@ def patch_transformers(): return False - old_get_stopping_criteria = transformers.generation_utils.GenerationMixin._get_stopping_criteria + old_get_stopping_criteria = transformers.GenerationMixin._get_stopping_criteria def new_get_stopping_criteria(self, *args, **kwargs): global tokenizer diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 485ac338..791228aa 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -27,7 +27,7 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers==4.24.0 + - transformers==4.25.1 - huggingface_hub>=0.10.1 - safetensors - accelerate diff --git a/environments/rocm.yml b/environments/rocm.yml index a0e23177..36ba966b 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -26,7 +26,7 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers==4.24.0 + - transformers==4.25.1 - huggingface_hub>=0.10.1 - safetensors - accelerate diff --git a/requirements.txt b/requirements.txt index 53e450c9..9e1a1248 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.24.0 +transformers==4.25.1 huggingface_hub>=0.10.1 Flask Flask-SocketIO diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 39c8451e..72dc31bb 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku == 0.0.5 jax == 0.2.21 jaxlib >= 0.1.69, <= 0.3.7 -transformers == 4.24.0 +transformers == 4.25.1 huggingface_hub >= 0.10.1 progressbar2 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck From 307fc97b9d9533c684f571ed6f1613c10543ea57 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 13 Jan 2023 22:49:32 +0100 Subject: [PATCH 08/13] ROCm Dependency Bump/Fix --- environments/rocm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index b79615c2..dcd76b05 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -21,7 +21,7 @@ dependencies: - psutil - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 - - torch==1.11.* + - torch==1.12.1+rocm5.1.1 - flask-cloudflared - flask-ngrok - lupa==1.10 From ed62d104eecf51bba4f950759ca7990fe3f2775f Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 14 Jan 2023 21:14:39 +0100 Subject: [PATCH 09/13] --cacheonly --- aiserver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 716ac56c..e65e9cbf 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1496,6 +1496,7 @@ def general_startup(override_args=None): parser.add_argument("--no_aria2", action='store_true', default=False, help="Prevents KoboldAI from using aria2 to download huggingface models more efficiently, in case aria2 is causing you issues") parser.add_argument("--lowmem", action='store_true', help="Extra Low Memory loading for the GPU, slower but memory does not peak to twice the usage") parser.add_argument("--savemodel", action='store_true', help="Saves the model to the models folder even if --colab is used (Allows you to save models to Google Drive)") + parser.add_argument("--cacheonly", action='store_true', help="Does not save the model to the models folder when it has been downloaded in the cache") parser.add_argument("--customsettings", help="Preloads arguements from json file. You only need to provide the location of the json file. Use customsettings.json template file. It can be renamed if you wish so that you can store multiple configurations. Leave any settings you want as default as null. Any values you wish to set need to be in double quotation marks") parser.add_argument("--no_ui", action='store_true', default=False, help="Disables the GUI and Socket.IO server while leaving the API server running.") parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6") @@ -3194,7 +3195,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal torch._utils._rebuild_tensor = old_rebuild_tensor - if not args.colab or args.savemodel: + if not (args.colab or args.cacheonly) or args.savemodel: import shutil tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_'))) if(koboldai_vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)): # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case From 2e93b12affa8033e29da8b7da2b058bc60e59cd1 Mon Sep 17 00:00:00 2001 From: SammCheese Date: Sun, 15 Jan 2023 20:50:00 +0100 Subject: [PATCH 10/13] add a info reference for the model selection --- aiserver.py | 10 +++++----- static/koboldai.css | 16 +++++++++++++--- static/koboldai.js | 10 ++++++++++ templates/popups.html | 6 ++++++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/aiserver.py b/aiserver.py index e65e9cbf..9d0daabc 100644 --- a/aiserver.py +++ b/aiserver.py @@ -317,12 +317,12 @@ model_menu = { ["Return to Main Menu", "mainmenu", "", True], ], 'apilist': [ - ["GooseAI API (requires API key)", "GooseAI", "", False], - ["OpenAI API (requires API key)", "OAI", "", False], - ["InferKit API (requires API key)", "InferKit", "", False], + ["GooseAI API (requires API key)", "GooseAI", "None", False], + ["OpenAI API (requires API key)", "OAI", "None", False], + ["InferKit API (requires API key)", "InferKit", "None", False], # ["KoboldAI Server API (Old Google Colab)", "Colab", "", False], - ["KoboldAI API", "API", "", False], - ["KoboldAI Horde", "CLUSTER", "", False], + ["KoboldAI API", "API", "None", False], + ["KoboldAI Horde", "CLUSTER", "None", False], ["Return to Main Menu", "mainmenu", "", True], ] } diff --git a/static/koboldai.css b/static/koboldai.css index 64827944..516172a9 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -1941,7 +1941,7 @@ body { } -.popup .model_item { +#modelspecifier, .popup .model_item { width: 98%; background-color: var(--popup_item_color); color: var(--popup_item_color_text); @@ -2089,14 +2089,24 @@ body { } /*----------------------------- Model Load Popup ------------------------------------------*/ -.popup_list_area .model_item .model { +#specspan, .popup_list_area .model_item .model { grid-area: file; display: grid; grid-template-areas: "item gpu_size"; - grid-template-columns: auto 40px; + grid-template-columns: auto 95px; cursor: pointer; } +#specspan { + grid-template-columns: auto 100px !important; + cursor: auto !important; +} + +#model-spec-usage { + position: relative; + left: -20px; +} + .popup .model_item:hover { background-color: var(--popup_hover_color); color: var(--popup_hover_color_text); diff --git a/static/koboldai.js b/static/koboldai.js index 17bd5449..cce66f80 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1607,6 +1607,16 @@ function show_model_menu(data) { } else { document.getElementById("custommodelname").classList.add("hidden"); } + + + // detect if we are in a model selection screen and show the reference + var refelement = document.getElementById("modelspecifier"); + var check = document.getElementById("mainmenu"); + if (check) { + refelement.classList.remove("hidden"); + } else { + refelement.classList.add("hidden"); + } openPopup("load-model"); } diff --git a/templates/popups.html b/templates/popups.html index 9c209717..44cf7cb6 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -40,6 +40,12 @@
Select A Model To Load
+ + +
Name
+
Usage (VRAM)
+
+
From e9859cf17def1cd2d8d99f6899c5aeeedd2f7c49 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 16 Jan 2023 16:32:17 +0100 Subject: [PATCH 11/13] DNSPython workaround DNSPython had an update eventlet is not ready for. We now manually cap DNSPython to ensure the installations still happen correctly. --- environments/huggingface.yml | 1 + environments/rocm.yml | 1 + requirements.txt | 1 + requirements_mtj.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 3f25a7fd..341a8e87 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -11,6 +11,7 @@ dependencies: - python=3.8.* - cudatoolkit=11.1 - eventlet + - dnspython=2.2.1 - markdown - bleach=4.1.0 - pip diff --git a/environments/rocm.yml b/environments/rocm.yml index dcd76b05..3e50c565 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -8,6 +8,7 @@ dependencies: - flask-session - python=3.8.* - eventlet + - dnspython=2.2.1 - markdown - bleach=4.1.0 - pip diff --git a/requirements.txt b/requirements.txt index 44f50bdd..a2854835 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ torch >= 1.9, < 1.13 flask-cloudflared flask-ngrok eventlet +dnspython==2.2.1 lupa==1.10 markdown bleach==4.1.0 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 5d84dd43..f3dfe339 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -14,6 +14,7 @@ Flask-SocketIO flask-cloudflared >= 0.0.5 flask-ngrok eventlet +dnspython==2.2.1 lupa==1.10 markdown bleach==4.1.0 From 22acde1ab7da6b75d24015a3617303ef45cc91a8 Mon Sep 17 00:00:00 2001 From: henk717 Date: Wed, 18 Jan 2023 02:04:14 +0100 Subject: [PATCH 12/13] Download Manager Support docker-cuda --- docker-cuda/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-cuda/Dockerfile b/docker-cuda/Dockerfile index 49ec3c8a..bad861bf 100644 --- a/docker-cuda/Dockerfile +++ b/docker-cuda/Dockerfile @@ -6,4 +6,4 @@ WORKDIR /content/ COPY env.yml /home/micromamba/env.yml RUN micromamba install -y -n base -f /home/micromamba/env.yml USER root -RUN apt update && apt install xorg -y \ No newline at end of file +RUN apt update && apt install xorg aria2 -y From 24f50d6fb78e87a444e61afabce59f4d8b93a36e Mon Sep 17 00:00:00 2001 From: henk717 Date: Wed, 18 Jan 2023 02:04:45 +0100 Subject: [PATCH 13/13] Download Manager Support docker-rocm --- docker-rocm/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-rocm/Dockerfile b/docker-rocm/Dockerfile index 54f741b9..439e3a28 100644 --- a/docker-rocm/Dockerfile +++ b/docker-rocm/Dockerfile @@ -3,4 +3,4 @@ WORKDIR /content/ COPY env.yml /home/micromamba/env.yml RUN micromamba install -y -n base -f /home/micromamba/env.yml USER root -RUN apt update && apt install xorg libsqlite3-0 -y +RUN apt update && apt install xorg libsqlite3-0 aria2 -y