From 860b697a705ad28982b20aac1135e4ff35e50647 Mon Sep 17 00:00:00 2001 From: Syler Clayton <2224238+Relys@users.noreply.github.com> Date: Sat, 15 Apr 2023 09:51:45 -0700 Subject: [PATCH 01/28] Update install_requirements.sh Made parameter case insensitive. --- install_requirements.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install_requirements.sh b/install_requirements.sh index 7d2c0c2f..e131ad37 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -1,12 +1,12 @@ #!/bin/bash -if [[ $1 = "cuda" ]]; then +if [[ $1 = "cuda" ]||[ $1 = "CUDA" ]]; then wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y exit fi -if [[ $1 = "rocm" ]]; then +if [[ $1 = "rocm" ]||[ $1 = "ROCM" ]]; then wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster From b1d3a37fb082197115765cc16e8faa074f1920b4 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 16 Apr 2023 19:31:17 +0200 Subject: [PATCH 02/28] <|comments|> now are [<|comments|>] to support OA --- koboldai_settings.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index be86e302..fdd89ba9 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1204,13 +1204,13 @@ class undefined_settings(settings): class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', - 'comregex_ui', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', + 'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model', - 'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states'] + 'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui'] settings_name = "system" def __init__(self, socketio, koboldai_var): self._socketio = socketio @@ -1252,8 +1252,8 @@ class system_settings(settings): self.regex_sl = re.compile(r'\n*(?<=.) *\n(.|\n)*') # Pattern for limiting the output to a single line self.acregex_ai = re.compile(r'\n* *>(.|\n)*') # Pattern for matching adventure actions from the AI so we can remove them self.acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses) - self.comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI - self.comregex_ui = re.compile(r'(<\|(?:.|\n)*?\|>)') # Pattern for matching comments in the editor + self.comregex_ai = re.compile(r'(?:\n\[<\|(?:.|\n)*?\|>\](?=\n|$))|(?:\[<\|(?:.|\n)*?\|>\]\n?)') # Pattern for matching comments to remove them before sending them to the AI + self.comregex_ui = re.compile(r'(\[<\|(?:.|\n)*?\|>\])') # Pattern for matching comments in the editor self.host = False self.flaskwebgui = False self.quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page) From ded5542d3a78be4d9c0e79486cd387f285acce42 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Sun, 16 Apr 2023 21:11:35 +0200 Subject: [PATCH 03/28] Fix error in 4bit offloading initialization code when running with --nobreakmodel --- aiserver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index a7583d2c..913bea5c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3171,7 +3171,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal lowmem = {} koboldai_vars.lazy_load = False # Also, lazy loader doesn't support GPT-2 models - gpu_layers_list = [int(l) for l in gpu_layers.split(",")] + try: + gpu_layers_list = [int(l) for l in gpu_layers.split(",")] + except ValueError: + gpu_layers_list = [utils.num_layers(model_config)] offload_4bit = use_4_bit and sum(gpu_layers_list) < utils.num_layers(model_config) if offload_4bit: From 2926dac72b4c8cd5a2c93eaad4d901cc109e66d4 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 16 Apr 2023 23:59:13 +0200 Subject: [PATCH 04/28] Don't print allowed_ips if unused --- aiserver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 886a802e..b8ff4f3e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3490,7 +3490,8 @@ def is_allowed_ip(): client_ip = request.remote_addr if request.path != '/genre_data.json': print("Connection Attempt: " + request.remote_addr) - print("Allowed?: ", request.remote_addr in allowed_ips) + if allowed_ips: + print("Allowed?: ", request.remote_addr in allowed_ips) return client_ip in allowed_ips @@ -4189,7 +4190,8 @@ def execute_outmod(): @socketio.on('connect') def do_connect(): print("Connection Attempt: " + request.remote_addr) - print("Allowed?: ", request.remote_addr in allowed_ips) + if allowed_ips: + print("Allowed?: ", request.remote_addr in allowed_ips) if request.args.get("rely") == "true": return logger.info("Client connected! UI_{}".format(request.args.get('ui'))) From fcba26f631e7efcd4a92de206d2bd6da40ed65c3 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sun, 16 Apr 2023 20:32:51 -0400 Subject: [PATCH 05/28] Fix for uploading files in UI2 --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index fdd89ba9..06a15e4e 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1205,7 +1205,7 @@ class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', - 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states'] + 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', From a34ce85d21ac750b6a0b69a3e33ff24e9179a224 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sun, 16 Apr 2023 20:43:03 -0400 Subject: [PATCH 06/28] Fix for UI1 remote mode file loading from browser --- aiserver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index b8ff4f3e..9dd621ef 100644 --- a/aiserver.py +++ b/aiserver.py @@ -7454,13 +7454,13 @@ def loadRequest(loadpath, filename=None): if not loadpath: return - + #Original UI only sends the story name and assumes it's always a .json file... here we check to see if it's a directory to load that way - if not os.path.exists(loadpath): + if not isinstance(loadpath, dict) and not os.path.exists(loadpath): if os.path.exists(loadpath.replace(".json", "")): loadpath = loadpath.replace(".json", "") - if os.path.isdir(loadpath): + if not isinstance(loadpath, dict) and os.path.isdir(loadpath): if not valid_v3_story(loadpath): raise RuntimeError(f"Tried to load {loadpath}, a non-save directory.") koboldai_vars.update_story_path_structure(loadpath) From d16a3f4dc3746e65bc16e92d55b91dc15d7b3641 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sun, 16 Apr 2023 21:02:42 -0400 Subject: [PATCH 07/28] Added comregex_ui and comregex_ai to the UI under other settings. Needs proper titles and descriptions still. --- gensettings.py | 32 ++++++++++++++++++++++++++++++++ koboldai_settings.py | 16 ++++++++++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/gensettings.py b/gensettings.py index 8d68b4b5..a0a20908 100644 --- a/gensettings.py +++ b/gensettings.py @@ -881,6 +881,38 @@ gensettingstf = [ "classname": "system", "name": "seed", "extra_classes": "var_sync_alt_system_seed_specified", + "ui_level": 2 + }, + { + "uitype": "text", + "unit": "text", + "label": "comregex_ai_string", + "id": "comregex_ai_string", + "min": 0, + "max": 1, + "step": 1, + "default": 1, + "tooltip": "Pattern for matching comments to remove them before sending them to the AI.", + "menu_path": "Settings", + "sub_path": "Other", + "classname": "system", + "name": "comregex_ai_string", + "ui_level": 2 + }, + { + "uitype": "text", + "unit": "text", + "label": "comregex_ui_string", + "id": "comregex_ui_string", + "min": 0, + "max": 1, + "step": 1, + "default": 1, + "tooltip": "Pattern for matching comments in the editor.", + "menu_path": "Settings", + "sub_path": "Other", + "classname": "system", + "name": "comregex_ui_string", "ui_level": 2 }, { diff --git a/koboldai_settings.py b/koboldai_settings.py index 06a15e4e..407ae1e1 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1203,9 +1203,9 @@ class undefined_settings(settings): class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', - 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', + 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', - 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui'] + 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', @@ -1252,8 +1252,10 @@ class system_settings(settings): self.regex_sl = re.compile(r'\n*(?<=.) *\n(.|\n)*') # Pattern for limiting the output to a single line self.acregex_ai = re.compile(r'\n* *>(.|\n)*') # Pattern for matching adventure actions from the AI so we can remove them self.acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses) - self.comregex_ai = re.compile(r'(?:\n\[<\|(?:.|\n)*?\|>\](?=\n|$))|(?:\[<\|(?:.|\n)*?\|>\]\n?)') # Pattern for matching comments to remove them before sending them to the AI - self.comregex_ui = re.compile(r'(\[<\|(?:.|\n)*?\|>\])') # Pattern for matching comments in the editor + self.comregex_ai_string = '(?:\n\[<\|(?:.|\n)*?\|>\](?=\n|$))|(?:\[<\|(?:.|\n)*?\|>\]\n?)' # Pattern for matching comments to remove them before sending them to the AI + self.comregex_ui_string = '(\[<\|(?:.|\n)*?\|>\])' # Pattern for matching comments in the editor + self.comregex_ai = re.compile(self.comregex_ai_string) # Pattern for matching comments to remove them before sending them to the AI + self.comregex_ui = re.compile(self.comregex_ui_string) # Pattern for matching comments in the editor self.host = False self.flaskwebgui = False self.quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page) @@ -1340,6 +1342,12 @@ class system_settings(settings): self._socketio.emit('from_server', {'cmd': 'spstatitems', 'data': {self.spfilename: self.spmeta} if self.allowsp and len(self.spfilename) else {}}, namespace=None, broadcast=True, room="UI_1") super().__setattr__("sp_changed", False) + if name == 'comregex_ai_string': + self.comregex_ai = re.compile(self.comregex_ai_string) + + if name == 'comregex_ui_string': + self.comregex_ui = re.compile(self.comregex_ui_string) + if name == 'keep_img_gen_in_memory' and value == False: self.image_pipeline = None From 10c99a853c207c34d163914042b903d606dad8ee Mon Sep 17 00:00:00 2001 From: nerodiafasciata Date: Wed, 12 Apr 2023 21:37:44 -0500 Subject: [PATCH 08/28] Added AMD instructions, added formatting Added AMD install instructions Formatted the install/run section for improved readability --- README.md | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0657fa0b..170c4f42 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ ### Install/Use Guide (This guide is for both Linux and Windows and assumes user has git installed and a basic grasp of command line use) +#### Installation In the command prompt/command line navigate to where you want the KoboldAI subfolder to be created. `git clone https://github.com/0cc4m/KoboldAI -b latestgptq --recurse-submodules` @@ -11,10 +12,28 @@ In the command prompt/command line navigate to where you want the KoboldAI subfo Next step, (Windows) subfolder mode or B: option doesn't matter choose either -[if on Windows] `install_requirements.bat` if it closes the window when it finishes, reopen a command prompt and navigate back to your KoboldAI directory. +* [if on Windows] + ``` + install_requirements.bat + ``` + * if it closes the window when it finishes, reopen a command prompt and navigate back to your KoboldAI directory. -[if on Linux] `install_requirements.sh` +* [if on Linux with Nvidia] + ``` + ./install_requirements.sh + ``` +* [if on Linux with AMD] + ``` + ./install_requirements.sh rocm + ./commandline-rocm.sh + cd repos/gptq + python setup_cuda.py install + ``` + * If you get error missing hip/hip_runtime_xxx.h you dont have proper rocm & hip pkg installed + * If you get CUDA_HOME envar is not set run in env: + `pip3 install torch --index-url https://download.pytorch.org/whl/rocm5.4.2 --force-reinstall` +#### Setting up models If you haven't already done so, create a model folder with the same name as your model (or whatever you want to name the folder) Put your 4bit quantized .pt or .safetensors in that folder with all associated .json files and tokenizer.model (.json files and tokenizer.model should be from the Huggingface model folder of the same model type). @@ -23,9 +42,10 @@ Then move your model folder to KoboldAI/models, and rename the .pt or .safetenso So - your .pt's model folder should look like this: "4bit.pt, config.json, generation_config.json, pytorch_model.bin.index.json, special_tokens_map.json, tokenizer.model, tokenizer_config.json" Note: the 4bit.pt file can be in the same folder as the regular HF .bin files it was quantized from, so long as the 4-bit toggle switch is on, it'll load the quantized model (4-bit switch explained below). +#### Running KoboldAI and loading 4bit models If you haven't done so already, exit the command prompt/leave KAI's conda env. (Close the commandline window on Windows, run `exit` on Linux) -Run `play.bat` [windows] or `play.sh` [linux] +Run `play.bat` [windows], `play.sh` [linux Nvidia], or `play-rocm.sh` [linux AMD] Switch to UI2, enable Experimental UI under the Interface tab, then load your model and be sure 4-bit toggle is on. From ee6e7e9b726b5c5186fe73430f417e551f5f3bc5 Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 17 Apr 2023 22:59:55 +0200 Subject: [PATCH 09/28] Colab description changes --- colab/GPU.ipynb | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb index c6c1e5ba..3c6b9706 100644 --- a/colab/GPU.ipynb +++ b/colab/GPU.ipynb @@ -39,8 +39,6 @@ "\n", "For more information about KoboldAI check our our Github readme : https://github.com/KoboldAI/KoboldAI-Client/blob/main/readme.md\n", "\n", - "For the larger AI models (That are typically more coherent) check out our **[TPU edition](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)**!\n", - "\n", "---\n", "## How to load KoboldAI: Everything you need to know\n", "1. On a phone? First put your browser in desktop mode because of a Google Colab bug. Otherwise nothing will happen when you click the play button. Then tap the play button next to \"<-- Tap This if you play on Mobile\", you will see an audio player. Keep the audio player playing so Colab does not get shut down in the background.\n", @@ -210,22 +208,6 @@ "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n", "| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n", "\n", - "# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n", - "\n", - "| Model | Style | Description |\n", - "| --- | --- | --- |\n", - "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n", - "| [Erebus](https://huggingface.co/KoboldAI/OPT-13B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n", - "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", - "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n", - "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", - "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", - "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", - "| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n", - "| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n", - "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n", - "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n", - "\n", "| Style | Description |\n", "| --------- | ------------------------------------------------------------ |\n", "| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n", From e4c15fe1f6a2be574eb223e0102cbdb2a11dfe40 Mon Sep 17 00:00:00 2001 From: henk717 Date: Fri, 21 Apr 2023 03:00:52 +0200 Subject: [PATCH 10/28] Update install_requirements.sh --- install_requirements.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install_requirements.sh b/install_requirements.sh index e131ad37..12c83595 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -1,12 +1,12 @@ #!/bin/bash -if [[ $1 = "cuda" ]||[ $1 = "CUDA" ]]; then +if [[ $1 = "cuda" || $1 = "CUDA" ]]; then wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y exit fi -if [[ $1 = "rocm" ]||[ $1 = "ROCM" ]]; then +if [[ $1 = "rocm" || $1 = "ROCM" ]]; then wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster From 4502a3f6b8dfe49523a0b94e8f461b5ea6bb208d Mon Sep 17 00:00:00 2001 From: Bogdan Drema Date: Sat, 22 Apr 2023 22:31:21 +0100 Subject: [PATCH 11/28] Fix: TPU driver error to_dlpack/from_dlpack was causing issues with tensor with new jax version --- requirements_mtj.txt | 6 +++--- tpu_mtj_backend.py | 29 ++++++++++++++--------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 37b76a23..19da3910 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -2,9 +2,9 @@ torch >= 1.9, < 1.13 numpy tqdm requests -dm-haiku == 0.0.5 -jax == 0.2.21 -jaxlib >= 0.1.69, <= 0.3.7 +dm-haiku==0.0.9 +jax==0.3.25 +jaxlib==0.3.25 transformers == 4.28.0 chex == 0.1.5 huggingface_hub==0.12.1 diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 02754d95..dc0a664d 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1095,7 +1095,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): koboldai_vars.status_message = "" -def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if "pad_token_id" in kwargs: @@ -1270,11 +1270,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo logger.message(f"KoboldAI has finished loading and is available at the following link for UI 1: {koboldai_vars.cloudflare_link}") logger.message(f"KoboldAI has finished loading and is available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui") - - global shard_xmap, batch_xmap - shard_xmap = __shard_xmap() - batch_xmap = __batch_xmap(shard_dim=cores_per_replica) - global badwords # These are the tokens that we don't want the AI to ever write badwords = jnp.array(koboldai_vars.badwordsids).squeeze() @@ -1401,19 +1396,20 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo #if "no_transpose" not in transforms and tensor.ndim == 2: # tensor = tensor.T tensor.unsqueeze_(0) - if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: - tensor = tensor.bfloat16() + # Shard the tensor so that parts of the tensor can be used # on different TPU cores + tensor = reshard_reverse( + tensor, + params["cores_per_replica"], + network.state["params"][spec["module"]][spec["param"]].shape, + ) + tensor = jnp.array(tensor.detach()) + if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: + tensor = tensor.bfloat16() network.state["params"][spec["module"]][spec["param"]] = move_xmap( - jax.dlpack.from_dlpack(torch.utils.dlpack.to_dlpack( - reshard_reverse( - tensor, - params["cores_per_replica"], - network.state["params"][spec["module"]][spec["param"]].shape, - ) - )).copy(), + tensor, np.empty(params["cores_per_replica"]), ) @@ -1506,3 +1502,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache") #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica)) + global shard_xmap, batch_xmap + shard_xmap = __shard_xmap() + batch_xmap = __batch_xmap(shard_dim=cores_per_replica) \ No newline at end of file From 8f44141f965941fa768a668991d11247a410149a Mon Sep 17 00:00:00 2001 From: henk717 Date: Sun, 23 Apr 2023 01:43:37 +0200 Subject: [PATCH 12/28] Pin driver to the one from JAX 0.3.25 --- tpu_mtj_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index dc0a664d..d8edb92f 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1095,7 +1095,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): koboldai_vars.status_message = "" -def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if "pad_token_id" in kwargs: @@ -1504,4 +1504,4 @@ def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=Fal #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica)) global shard_xmap, batch_xmap shard_xmap = __shard_xmap() - batch_xmap = __batch_xmap(shard_dim=cores_per_replica) \ No newline at end of file + batch_xmap = __batch_xmap(shard_dim=cores_per_replica) From 92a0bf9524f831d10b8b1006ee383a300fef5af8 Mon Sep 17 00:00:00 2001 From: Bogdan Drema Date: Sun, 23 Apr 2023 00:49:42 +0100 Subject: [PATCH 13/28] Fix: TPU driver error to_dlpack/from_dlpack was causing issues with tensor with new jax version --- requirements_mtj.txt | 7 ++++--- tpu_mtj_backend.py | 26 ++++++++++++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 9447541f..ea399a8f 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -2,9 +2,10 @@ torch >= 1.9, < 1.13 numpy tqdm requests -dm-haiku == 0.0.5 -jax == 0.2.21 -jaxlib >= 0.1.69, <= 0.3.7 +dm-haiku==0.0.9 +jax==0.3.25 +jaxlib==0.3.25 +transformers == 4.28.0 chex == 0.1.5 transformers == 4.24.0 huggingface_hub==0.12.1 diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 4b27493e..f878d690 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1049,7 +1049,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): raise RuntimeError(error) -def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None: +def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if "pad_token_id" in kwargs: @@ -1195,6 +1195,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ()) maps.thread_resources.env = thread_resources_env + global shard_xmap, batch_xmap shard_xmap = __shard_xmap() batch_xmap = __batch_xmap(shard_dim=cores_per_replica) @@ -1244,6 +1245,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo from tqdm.auto import tqdm import functools + def callback(model_dict, f, **_): if callback.nested: return @@ -1317,19 +1319,20 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo #if "no_transpose" not in transforms and tensor.ndim == 2: # tensor = tensor.T tensor.unsqueeze_(0) - if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: - tensor = tensor.bfloat16() + # Shard the tensor so that parts of the tensor can be used # on different TPU cores + tensor = reshard_reverse( + tensor, + params["cores_per_replica"], + network.state["params"][spec["module"]][spec["param"]].shape, + ) + tensor = jnp.array(tensor.detach()) + if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: + tensor = tensor.bfloat16() network.state["params"][spec["module"]][spec["param"]] = move_xmap( - jax.dlpack.from_dlpack(torch.utils.dlpack.to_dlpack( - reshard_reverse( - tensor, - params["cores_per_replica"], - network.state["params"][spec["module"]][spec["param"]].shape, - ) - )).copy(), + tensor, np.empty(params["cores_per_replica"]), ) @@ -1416,3 +1419,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica)) + global shard_xmap, batch_xmap + shard_xmap = __shard_xmap() + batch_xmap = __batch_xmap(shard_dim=cores_per_replica) \ No newline at end of file From 5f0e2001a7dc8cd630852450c7e4f9620d78a3f3 Mon Sep 17 00:00:00 2001 From: henk717 Date: Sun, 23 Apr 2023 17:50:03 +0200 Subject: [PATCH 14/28] Remove broken TPU disclaimer --- colab/TPU.ipynb | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 48fc7bdc..20d0aba9 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -2,30 +2,7 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# GOOGLE HAS BROKEN SUPPORT FOR MESH TRANSFORMERS JAX IN THEIR DRIVER, THIS MESSAGE WILL BE REMOVED ONCE THE NOTEBOOK WORKS AGAIN\n", - "---\n", - "Are you a developer familair with Jax? We could use some help.\n", - "Our Mesh Transformers Jax fork resides here (but unfortunately VE-Forbryderne has gone missing) : https://github.com/VE-FORBRYDERNE/mesh-transformer-jax\n", - "\n", - "This is combined with the TPU backend code you can find here: https://github.com/KoboldAI/KoboldAI-Client/blob/main/tpu_mtj_backend.py\n", - "\n", - "So far we know the driver initialization issues can be resolved when a newer version of Jax is used combined with a slight edit to tpu_mtj_backend.py to use Jax's built in code for driver intialization (Which is not present in the older version we currently use, hence that part being in our file).\n", - "\n", - "As far as we understand the issue is that xmap was broken in newer versions, and MTJ makes use of it. If someone can port this part of the code to be compatible with the newer Jax versions you can save this notebook!\n", - "\n", - "(Or Google, if you are reading this. Please reintroduce the old 0.1 compatibility since you broke an entire ecosystem of Mesh Transformers Jax users, which has historic value because it is the original implementation of GPT-J. There also do not seem to be alternatives that have the same amount of performance and model compatibility).\n", - "\n", "# Welcome to KoboldAI on Google Colab, TPU Edition!\n", "KoboldAI is a powerful and easy way to use a variety of AI based text generation experiences. You can use it to write stories, blog posts, play a text adventure game, use it like a chatbot and more! In some cases it might even help you with an assignment or programming task (But always make sure the information the AI mentions is correct, it loves to make stuff up).\n", "\n", @@ -258,8 +235,7 @@ "colab": { "name": "ColabKobold TPU", "provenance": [], - "private_outputs": true, - "include_colab_link": true + "private_outputs": true }, "kernelspec": { "display_name": "Python 3", From b4cb09590f75e2843dfbb8aa39b106f475754a41 Mon Sep 17 00:00:00 2001 From: henk717 Date: Sun, 23 Apr 2023 18:23:38 +0200 Subject: [PATCH 15/28] Update requirements_mtj.txt --- requirements_mtj.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements_mtj.txt b/requirements_mtj.txt index ea399a8f..41721424 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,6 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers == 4.28.0 chex == 0.1.5 transformers == 4.24.0 huggingface_hub==0.12.1 From d88f10907367fd257fbe7111c1a535644cd19619 Mon Sep 17 00:00:00 2001 From: henk717 Date: Sun, 23 Apr 2023 18:49:25 +0200 Subject: [PATCH 16/28] TPU Fix Fix --- tpu_mtj_backend.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index f878d690..205f9cc4 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1195,11 +1195,6 @@ def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=Fal thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ()) maps.thread_resources.env = thread_resources_env - - global shard_xmap, batch_xmap - shard_xmap = __shard_xmap() - batch_xmap = __batch_xmap(shard_dim=cores_per_replica) - global badwords # These are the tokens that we don't want the AI to ever write badwords = jnp.array(vars.badwordsids).squeeze() @@ -1421,4 +1416,4 @@ def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=Fal #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica)) global shard_xmap, batch_xmap shard_xmap = __shard_xmap() - batch_xmap = __batch_xmap(shard_dim=cores_per_replica) \ No newline at end of file + batch_xmap = __batch_xmap(shard_dim=cores_per_replica) From b808f039ab15a781b02b3bc00a8bbbf3624def31 Mon Sep 17 00:00:00 2001 From: henk717 Date: Sun, 23 Apr 2023 20:21:28 +0200 Subject: [PATCH 17/28] Pin TPU driver --- tpu_mtj_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 205f9cc4..fe20646c 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1049,7 +1049,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): raise RuntimeError(error) -def load_model(path: str, driver_version="tpu_driver_nightly", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: +def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None: global thread_resources_env, seq, tokenizer, network, params, pad_token_id if "pad_token_id" in kwargs: From 1c36eb9f2cf438535a461c3e77b1d56bd324d0dc Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 23 Apr 2023 21:24:17 +0200 Subject: [PATCH 18/28] Further Isolation --- install_git_transformers.bat | 4 ++++ install_requirements.bat | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/install_git_transformers.bat b/install_git_transformers.bat index 3e154b80..8c6257cc 100644 --- a/install_git_transformers.bat +++ b/install_git_transformers.bat @@ -1,6 +1,10 @@ @echo off cd /D %~dp0 + +:Isolation SET CONDA_SHLVL= +SET PYTHONNOUSERSITE=1 +SET PYTHONPATH= TITLE KoboldAI - Git Transformers Installer ECHO This script will replace the Transformers version with the latest Git Transformers which may contain breaking changes. diff --git a/install_requirements.bat b/install_requirements.bat index 2a4534c1..69e4a47a 100644 --- a/install_requirements.bat +++ b/install_requirements.bat @@ -8,7 +8,11 @@ echo. Reg add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f 2>nul cd /D %~dp0 + +:Isolation SET CONDA_SHLVL= +SET PYTHONNOUSERSITE=1 +SET PYTHONPATH= if exist miniconda3\ ( echo Delete existing installation? From b8cd5f4c03e9bf2dd2936568d9abf2874775ed45 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 23 Apr 2023 21:35:24 +0200 Subject: [PATCH 19/28] Revert "Added comregex_ui and comregex_ai to the UI under other settings. Needs proper titles and descriptions still." This reverts commit d16a3f4dc3746e65bc16e92d55b91dc15d7b3641. --- gensettings.py | 32 -------------------------------- koboldai_settings.py | 16 ++++------------ 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/gensettings.py b/gensettings.py index a0a20908..8d68b4b5 100644 --- a/gensettings.py +++ b/gensettings.py @@ -881,38 +881,6 @@ gensettingstf = [ "classname": "system", "name": "seed", "extra_classes": "var_sync_alt_system_seed_specified", - "ui_level": 2 - }, - { - "uitype": "text", - "unit": "text", - "label": "comregex_ai_string", - "id": "comregex_ai_string", - "min": 0, - "max": 1, - "step": 1, - "default": 1, - "tooltip": "Pattern for matching comments to remove them before sending them to the AI.", - "menu_path": "Settings", - "sub_path": "Other", - "classname": "system", - "name": "comregex_ai_string", - "ui_level": 2 - }, - { - "uitype": "text", - "unit": "text", - "label": "comregex_ui_string", - "id": "comregex_ui_string", - "min": 0, - "max": 1, - "step": 1, - "default": 1, - "tooltip": "Pattern for matching comments in the editor.", - "menu_path": "Settings", - "sub_path": "Other", - "classname": "system", - "name": "comregex_ui_string", "ui_level": 2 }, { diff --git a/koboldai_settings.py b/koboldai_settings.py index 407ae1e1..06a15e4e 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1203,9 +1203,9 @@ class undefined_settings(settings): class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', - 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', + 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', - 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states'] + 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', @@ -1252,10 +1252,8 @@ class system_settings(settings): self.regex_sl = re.compile(r'\n*(?<=.) *\n(.|\n)*') # Pattern for limiting the output to a single line self.acregex_ai = re.compile(r'\n* *>(.|\n)*') # Pattern for matching adventure actions from the AI so we can remove them self.acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses) - self.comregex_ai_string = '(?:\n\[<\|(?:.|\n)*?\|>\](?=\n|$))|(?:\[<\|(?:.|\n)*?\|>\]\n?)' # Pattern for matching comments to remove them before sending them to the AI - self.comregex_ui_string = '(\[<\|(?:.|\n)*?\|>\])' # Pattern for matching comments in the editor - self.comregex_ai = re.compile(self.comregex_ai_string) # Pattern for matching comments to remove them before sending them to the AI - self.comregex_ui = re.compile(self.comregex_ui_string) # Pattern for matching comments in the editor + self.comregex_ai = re.compile(r'(?:\n\[<\|(?:.|\n)*?\|>\](?=\n|$))|(?:\[<\|(?:.|\n)*?\|>\]\n?)') # Pattern for matching comments to remove them before sending them to the AI + self.comregex_ui = re.compile(r'(\[<\|(?:.|\n)*?\|>\])') # Pattern for matching comments in the editor self.host = False self.flaskwebgui = False self.quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page) @@ -1342,12 +1340,6 @@ class system_settings(settings): self._socketio.emit('from_server', {'cmd': 'spstatitems', 'data': {self.spfilename: self.spmeta} if self.allowsp and len(self.spfilename) else {}}, namespace=None, broadcast=True, room="UI_1") super().__setattr__("sp_changed", False) - if name == 'comregex_ai_string': - self.comregex_ai = re.compile(self.comregex_ai_string) - - if name == 'comregex_ui_string': - self.comregex_ui = re.compile(self.comregex_ui_string) - if name == 'keep_img_gen_in_memory' and value == False: self.image_pipeline = None From 560c8d23fb20d97b486082e8a7193b109762e370 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 23 Apr 2023 21:37:24 +0200 Subject: [PATCH 20/28] Re-add UI Json Fix --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index 06a15e4e..dbc9d115 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1203,7 +1203,7 @@ class undefined_settings(settings): class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', - 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', + 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', From 8d61d6b04ab7c100db4871fb33c7b7eec835ccc4 Mon Sep 17 00:00:00 2001 From: nerodiafasciata Date: Tue, 25 Apr 2023 00:25:28 -0500 Subject: [PATCH 21/28] install instruction update: don't run as admin (#12) * Update README.md Added note to tell windows users not to install as admin --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 170c4f42..67fe881a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ #### Installation In the command prompt/command line navigate to where you want the KoboldAI subfolder to be created. +Note: do not run your command prompt as administrator/with elevated priviledges, reports suggest this leads to problems. + `git clone https://github.com/0cc4m/KoboldAI -b latestgptq --recurse-submodules` `cd KoboldAI` From b58e5f353febf4c20f5ae2194b369f7e9160420a Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Tue, 25 Apr 2023 18:56:25 +0200 Subject: [PATCH 22/28] Add wheel links file for pip --- docs/gptq-whl-links.html | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/gptq-whl-links.html diff --git a/docs/gptq-whl-links.html b/docs/gptq-whl-links.html new file mode 100644 index 00000000..c612b5e1 --- /dev/null +++ b/docs/gptq-whl-links.html @@ -0,0 +1,2 @@ +quant_cuda-0.0.0-cp38-cp38-linux_x86_64.whl +quant_cuda-0.0.0-cp38-cp38-win_amd64.whl From cd289a947824ad52daa9192363115b5322dbf749 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Tue, 25 Apr 2023 19:06:25 +0200 Subject: [PATCH 23/28] Use custom pip repo for wheels instead of modifying install_requirements scripts --- environments/huggingface.yml | 2 ++ install_requirements.bat | 4 ---- install_requirements.sh | 3 --- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index daa25e1f..35580603 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -45,3 +45,5 @@ dependencies: - ftfy - pydub - diffusers + - --find-links=https://0cc4m.github.io/KoboldAI/gptq-whl-links.html + - quant_cuda diff --git a/install_requirements.bat b/install_requirements.bat index 3b735ddf..2a4534c1 100644 --- a/install_requirements.bat +++ b/install_requirements.bat @@ -48,8 +48,6 @@ umamba.exe create -r B:\python\ -n base umamba.exe install --no-shortcuts -r B:\python\ -n base -f "%~dp0\environments\huggingface.yml" -y --always-copy umamba.exe -r B:\ clean -a -y rd B:\Python\pkgs /S /Q -call B:\python\condabin\activate -pip install "https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-win_amd64.whl" subst B: /d pause exit @@ -62,7 +60,5 @@ umamba.exe create -r miniconda3\ -n base umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy umamba.exe clean -a -y rd miniconda3\Python\pkgs /S /Q -call miniconda3\condabin\activate -pip install "https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-win_amd64.whl" pause exit diff --git a/install_requirements.sh b/install_requirements.sh index 7b5a8d5b..6f0e0dfd 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -5,9 +5,6 @@ wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar - bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y - -# Install quant_cuda module for 4-bit -bin/micromamba run -r runtime -n koboldai pip install https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-linux_x86_64.whl exit fi if [[ $1 = "rocm" ]]; then From 9eaa2aba47dda877fdf2a120bfbff9b1aa7e70ce Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 25 Apr 2023 22:49:56 +0200 Subject: [PATCH 24/28] Isolate OPT Tokenizer Fix to OPT models --- aiserver.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 9dd621ef..7e198dee 100644 --- a/aiserver.py +++ b/aiserver.py @@ -108,15 +108,14 @@ def new_init(self, *args, **kwargs): self.ncols = 99 tqdm.__init__ = new_init -# Fix some issues with the OPT tokenizer +# Add _koboldai_header support for some optional tokenizer fixes +# This used to be an OPT tokenizer fix, this has been moved search for "# These are model specific overrides if a model has bad defaults" for the new section from transformers import PreTrainedTokenizerBase old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ @classmethod def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs): tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs) - tokenizer._koboldai_header = tokenizer.encode("") - tokenizer.add_bos_token = False - tokenizer.add_prefix_space = False + tokenizer._koboldai_header = [] return tokenizer PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained @@ -3251,10 +3250,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal # koboldai_vars.badwordsids.append([vocab[key]]) # These are model specific overrides if a model has bad defaults + tokenizer._koboldai_header = [] if koboldai_vars.model_type == "llama": tokenizer.decode_with_prefix_space = True tokenizer.add_bos_token = False - + if koboldai_vars.model_type == "opt": + tokenizer._koboldai_header = tokenizer.encode("") + tokenizer.add_bos_token = False + tokenizer.add_prefix_space = False logger.info(f"Pipeline created: {koboldai_vars.model}") else: From 55625e5cde34a34f475484897c7a50ae9fcc2fe9 Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 26 Apr 2023 19:31:20 +0200 Subject: [PATCH 25/28] Linux Isolation --- commandline-rocm.sh | 2 ++ commandline.sh | 2 ++ install_requirements.sh | 1 + play-rocm.sh | 1 + play.sh | 1 + 5 files changed, 7 insertions(+) diff --git a/commandline-rocm.sh b/commandline-rocm.sh index 5c9a54aa..d34ca445 100755 --- a/commandline-rocm.sh +++ b/commandline-rocm.sh @@ -1 +1,3 @@ +export CONDA_AUTO_ACTIVATE_BASE=false +export PYTHONNOUSERSITE=1 bin/micromamba run -r runtime -n koboldai-rocm bash diff --git a/commandline.sh b/commandline.sh index 72338169..80cf9868 100755 --- a/commandline.sh +++ b/commandline.sh @@ -1 +1,3 @@ +export CONDA_AUTO_ACTIVATE_BASE=false +export PYTHONNOUSERSITE=1 bin/micromamba run -r runtime -n koboldai bash diff --git a/install_requirements.sh b/install_requirements.sh index 9241d5a1..6e37c7e9 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -1,4 +1,5 @@ #!/bin/bash +export PYTHONNOUSERSITE=1 git submodule update --init --recursive if [[ $1 = "cuda" || $1 = "CUDA" ]]; then wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba diff --git a/play-rocm.sh b/play-rocm.sh index 6cf2794a..e0753edc 100755 --- a/play-rocm.sh +++ b/play-rocm.sh @@ -1,4 +1,5 @@ #!/bin/bash +export PYTHONNOUSERSITE=1 if [ ! -f "runtime/envs/koboldai-rocm/bin/python" ]; then ./install_requirements.sh rocm fi diff --git a/play.sh b/play.sh index 10f3c8aa..8ce7b781 100755 --- a/play.sh +++ b/play.sh @@ -1,4 +1,5 @@ #!/bin/bash +export PYTHONNOUSERSITE=1 if [ ! -f "runtime/envs/koboldai/bin/python" ]; then ./install_requirements.sh cuda fi From 99c4c3bae4956e7190beb6909a42d7debd033553 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 17 Apr 2023 07:26:03 +0200 Subject: [PATCH 26/28] Show 4-bit toggle without experimental ui --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 85523734..2fc8990c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1809,7 +1809,7 @@ def get_model_info(model, directory=""): 'break_values': break_values, 'gpu_count': gpu_count, 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select, 'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False, - 'bit_4_available': koboldai_vars.bit_4_available if koboldai_vars.experimental_features else False, + 'bit_4_available': koboldai_vars.bit_4_available, 'show_custom_model_box': show_custom_model_box}) if send_horde_models: get_cluster_models({'key': key_value, 'url': default_url}) From aedb6388c5f22b3bd99a0b8e17dc45d14c50e142 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Thu, 27 Apr 2023 07:05:11 +0200 Subject: [PATCH 27/28] Update README, remove experimental UI --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 67fe881a..aadfd345 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ If you haven't done so already, exit the command prompt/leave KAI's conda env. ( Run `play.bat` [windows], `play.sh` [linux Nvidia], or `play-rocm.sh` [linux AMD] -Switch to UI2, enable Experimental UI under the Interface tab, then load your model and be sure 4-bit toggle is on. +Switch to UI2, then load your model and be sure 4-bit toggle is on. The 4bit toggle shows when a model to load is selected. From 18ac5dfce6398a561c4521356f7187e6977a7c61 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Thu, 27 Apr 2023 16:04:30 +0200 Subject: [PATCH 28/28] Update to Pytorch 1.13.1 and CUDA 11.7 --- docs/gptq-whl-links.html | 4 ++-- environments/huggingface.yml | 6 ++++-- repos/gptq | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/gptq-whl-links.html b/docs/gptq-whl-links.html index c612b5e1..710a43b8 100644 --- a/docs/gptq-whl-links.html +++ b/docs/gptq-whl-links.html @@ -1,2 +1,2 @@ -quant_cuda-0.0.0-cp38-cp38-linux_x86_64.whl -quant_cuda-0.0.0-cp38-cp38-win_amd64.whl +quant_cuda-0.0.0-cp38-cp38-linux_x86_64.whl +quant_cuda-0.0.0-cp38-cp38-win_amd64.whl diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 35580603..b1b86c45 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -1,6 +1,7 @@ name: koboldai channels: - pytorch + - nvidia - conda-forge - defaults dependencies: @@ -8,9 +9,10 @@ dependencies: - flask-socketio=5.3.2 - flask-session=0.4.0 - python-socketio=5.7.2 - - pytorch=1.11.* + - pytorch=1.13.1 + - pytorch-cuda=11.7 - python=3.8.* - - cudatoolkit=11.1 + - cudatoolkit=11.7 - eventlet=0.33.3 - dnspython=2.2.1 - markdown diff --git a/repos/gptq b/repos/gptq index 50b22e2b..3c16fd9c 160000 --- a/repos/gptq +++ b/repos/gptq @@ -1 +1 @@ -Subproject commit 50b22e2ba8ec0f5cf0dca719392a2ec5254e7228 +Subproject commit 3c16fd9c7946ebe85df8d951cb742adbc1966ec7