From b20d80ca2a9f13908202d8479bf901b383aeae2b Mon Sep 17 00:00:00 2001 From: vfbd Date: Wed, 2 Nov 2022 19:02:09 -0400 Subject: [PATCH 1/8] Add vocab padding to embedding bias in gptj.json --- maps/gptj.json | 4 ++-- tpu_mtj_backend.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/maps/gptj.json b/maps/gptj.json index 8e0bc9da..08b22130 100644 --- a/maps/gptj.json +++ b/maps/gptj.json @@ -9,11 +9,11 @@ }, "static_weights": { "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}}, - "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}}, + "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}, "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}}, "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}, "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}}, - "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}} + "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}} }, "layer_weights": { "transformer.h.{layer}.attn.bias": {}, diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index d992ba45..64484393 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1304,7 +1304,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo if "divide_by_shards" in transforms: tensor /= params["cores_per_replica"] if "vocab_pad" in transforms: - tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"])) + tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],)) if "no_transpose" not in transforms and tensor.ndim == 2: tensor = tensor.T tensor.unsqueeze_(0) From f1e4664d56fff50a3698df7b0316f8ac22bfb150 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 11 Nov 2022 21:13:51 +0100 Subject: [PATCH 2/8] Dependency improvements Adding psutil from conda to avoid the need for a compiler, finetuneanon should no longer be used. If people really want to use it they are on their own. --- environments/finetuneanon.yml | 26 -------------------------- environments/huggingface.yml | 1 + environments/rocm-finetune.yml | 25 ------------------------- environments/rocm.yml | 1 + 4 files changed, 2 insertions(+), 51 deletions(-) delete mode 100644 environments/finetuneanon.yml delete mode 100644 environments/rocm-finetune.yml diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml deleted file mode 100644 index 85d5ea66..00000000 --- a/environments/finetuneanon.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: koboldai -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - colorama - - flask-socketio - - flask-session - - pytorch - - cudatoolkit=11.1 - - tensorflow-gpu - - python=3.8.* - - eventlet - - markdown - - bleach=4.1.0 - - pip - - git=2.35.1 - - marshmallow>=3.13 - - apispec-webframeworks - - loguru - - pip: - - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - - flask-cloudflared - - flask-ngrok - - lupa==1.10 diff --git a/environments/huggingface.yml b/environments/huggingface.yml index c1a168ae..886bdb1b 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -21,6 +21,7 @@ dependencies: - apispec-webframeworks - loguru - termcolor + - psutil - pip: - flask-cloudflared - flask-ngrok diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml deleted file mode 100644 index fc56eb4f..00000000 --- a/environments/rocm-finetune.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: koboldai-ft -channels: - - conda-forge - - defaults -dependencies: - - colorama - - flask-socketio - - flask-session - - python=3.8.* - - eventlet - - markdown - - bleach=4.1.0 - - pip - - git=2.35.1 - - marshmallow>=3.13 - - apispec-webframeworks - - loguru - - pip: - - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html - - torch - - torchvision==0.11.1 - - flask-cloudflared - - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - - flask-ngrok - - lupa==1.10 diff --git a/environments/rocm.yml b/environments/rocm.yml index 03425cf8..04bb88a6 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -18,6 +18,7 @@ dependencies: - apispec-webframeworks - loguru - termcolor + - psutil - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 - torch From 440c5c333e19c0425ea5f1a6a2f6889c90731c34 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 12 Nov 2022 15:43:06 +0100 Subject: [PATCH 3/8] Clear flask_session on launch Can help with version switching bugs --- play.bat | 2 ++ 1 file changed, 2 insertions(+) diff --git a/play.bat b/play.bat index a44f0afa..c3d96045 100644 --- a/play.bat +++ b/play.bat @@ -2,6 +2,8 @@ cd /D %~dp0 SET CONDA_SHLVL= +rmdir /S /Q flask_session + TITLE KoboldAI - Server SET /P M= Date: Sat, 12 Nov 2022 16:54:40 +0100 Subject: [PATCH 4/8] New Models --- aiserver.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/aiserver.py b/aiserver.py index f4f1617e..97a2a741 100644 --- a/aiserver.py +++ b/aiserver.py @@ -125,6 +125,7 @@ model_menu = { ["NSFW Models", "nsfwlist", "", True], ["Untuned OPT", "optlist", "", True], ["Untuned GPT-Neo/J", "gptneolist", "", True], + ["Untuned Pythia", "pythialist", "", True], ["Untuned Fairseq Dense", "fsdlist", "", True], ["Untuned Bloom", "bloomlist", "", True], ["Untuned XGLM", "xglmlist", "", True], @@ -154,6 +155,7 @@ model_menu = { ["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False], ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False], ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False], + ["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False], ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False], ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False], ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False], @@ -183,12 +185,31 @@ model_menu = { ], 'gptneolist': [ ["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False], + ["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False], ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False], ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False], ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False], + ["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False], + ["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False], ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False], ["Return to Main Menu", "mainmenu", "", True], ], + 'pythialist': [ + ["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False], + ["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False], + ["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False], + ["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False], + ["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False], + ["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False], + ["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False], + ["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False], + ["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False], + ["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False], + ["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False], + ["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False], + ["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False], + ["Return to Main Menu", "mainmenu", "", True], + ], 'gpt2list': [ ["GPT-2 XL", "gpt2-xl", "6GB", False], ["GPT-2 Large", "gpt2-large", "4GB", False], From 13dff68de83e71a6af71a5447e77df1996dc2bf3 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 14 Nov 2022 16:59:53 +0100 Subject: [PATCH 5/8] Sampler Order Loading Fix --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 97a2a741..c48db24b 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1155,7 +1155,7 @@ def processsettings(js): if("andepth" in js): vars.andepth = js["andepth"] if("sampler_order" in js): - sampler_order = vars.sampler_order + sampler_order = js["sampler_order"] if(len(sampler_order) < 7): sampler_order = [6] + sampler_order vars.sampler_order = sampler_order From 3084552c0567cfb79847b9c2a1e4e0990d0a9730 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 14 Nov 2022 17:15:39 +0100 Subject: [PATCH 6/8] Sampler Order Fix for Models --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index c48db24b..ddedc9b1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1017,7 +1017,7 @@ def loadmodelsettings(): if("nobreakmodel" in js): vars.nobreakmodel = js["nobreakmodel"] if("sampler_order" in js): - sampler_order = vars.sampler_order + sampler_order = js["sampler_order"] if(len(sampler_order) < 7): sampler_order = [6] + sampler_order vars.sampler_order = sampler_order From 2603f1fd5d995a1cad4f10866c6ff78a138668de Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 20 Nov 2022 16:22:33 +0100 Subject: [PATCH 7/8] Version bump --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index ddedc9b1..eff21923 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 #==================================================================# # KoboldAI -# Version: 1.19.1 +# Version: 1.19.2 # By: The KoboldAI Community #==================================================================# From 9a3f0eaab27afd26ad45496392f162748797a2a6 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 21 Nov 2022 13:47:18 -0500 Subject: [PATCH 8/8] Only enable TPU transpose optimization if loading from HF model --- tpu_mtj_backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 067f7912..9bb1fda2 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1149,7 +1149,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo params[param] = default_params[param] # Use an optimization that will allow us to avoid one extra transpose operation - params["transposed_linear"] = True + if hf_checkpoint: + params["transposed_linear"] = True # Load tokenizer if vars.model == "TPUMeshTransformerGPTNeoX":