Merge pull request #7 from henk717/united

Merge united
2025-06-05 21:59:24 +02:00 · 2022-11-24 12:25:20 -08:00
parent 0ef79c25a6 04d9172fcd
commit 03ec4b5267
8 changed files with 33 additions and 58 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python3
 #==================================================================#
 # KoboldAI
-# Version: 1.19.1
+# Version: 1.19.2
 # By: The KoboldAI Community
 #==================================================================#
@@ -125,6 +125,7 @@ model_menu = {
        ["NSFW Models", "nsfwlist", "", True],
        ["Untuned OPT", "optlist", "", True],
        ["Untuned GPT-Neo/J", "gptneolist", "", True],
        ["Untuned Pythia", "pythialist", "", True],
        ["Untuned Fairseq Dense", "fsdlist", "", True],
        ["Untuned Bloom", "bloomlist", "", True],
        ["Untuned XGLM", "xglmlist", "", True],
@@ -154,6 +155,7 @@ model_menu = {
        ["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False],
        ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
        ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
        ["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False],       
        ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
        ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
        ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
@@ -183,12 +185,31 @@ model_menu = {
        ],
    'gptneolist': [
        ["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False],
        ["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False],
        ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
        ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
        ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
        ["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False],
        ["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False],
        ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
        ["Return to Main Menu", "mainmenu", "", True],
        ],
    'pythialist': [
        ["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False],
        ["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False],
        ["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False],
        ["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False],
        ["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False],
        ["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False],
        ["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False],
        ["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False],
        ["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False],        
        ["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False],
        ["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False],
        ["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False],
        ["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False],
        ["Return to Main Menu", "mainmenu", "", True],
        ],
    'gpt2list': [
        ["GPT-2 XL", "gpt2-xl", "6GB", False],
        ["GPT-2 Large", "gpt2-large", "4GB", False],
@@ -996,7 +1017,7 @@ def loadmodelsettings():
    if("nobreakmodel" in js):
        vars.nobreakmodel = js["nobreakmodel"]
    if("sampler_order" in js):
-        sampler_order = vars.sampler_order
+        sampler_order = js["sampler_order"]
        if(len(sampler_order) < 7):
            sampler_order = [6] + sampler_order
        vars.sampler_order = sampler_order
@@ -1134,7 +1155,7 @@ def processsettings(js):
    if("andepth" in js):
        vars.andepth = js["andepth"]
    if("sampler_order" in js):
-        sampler_order = vars.sampler_order
+        sampler_order = js["sampler_order"]
        if(len(sampler_order) < 7):
            sampler_order = [6] + sampler_order
        vars.sampler_order = sampler_order
--- a/environments/finetuneanon.yml
+++ b/environments/finetuneanon.yml
@@ -1,26 +0,0 @@
 name: koboldai
 channels:
  - pytorch
  - conda-forge
  - defaults
 dependencies:
  - colorama
  - flask-socketio
  - flask-session
  - pytorch
  - cudatoolkit=11.1
  - tensorflow-gpu
  - python=3.8.*
  - eventlet
  - markdown
  - bleach=4.1.0
  - pip
  - git=2.35.1
  - marshmallow>=3.13
  - apispec-webframeworks
  - loguru
  - pip:
    - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -21,6 +21,7 @@ dependencies:
  - apispec-webframeworks
  - loguru
  - termcolor
  - psutil
  - pip:
    - flask-cloudflared
    - flask-ngrok
--- a/environments/rocm-finetune.yml
+++ b/environments/rocm-finetune.yml
@@ -1,25 +0,0 @@
 name: koboldai-ft
 channels:
  - conda-forge
  - defaults
 dependencies:
  - colorama
  - flask-socketio
  - flask-session
  - python=3.8.*
  - eventlet
  - markdown
  - bleach=4.1.0
  - pip
  - git=2.35.1
  - marshmallow>=3.13
  - apispec-webframeworks
  - loguru
  - pip:
    - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
    - torch
    - torchvision==0.11.1
    - flask-cloudflared
    - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
    - flask-ngrok
    - lupa==1.10
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -18,6 +18,7 @@ dependencies:
  - apispec-webframeworks
  - loguru
  - termcolor
  - psutil
  - pip:
    - --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
    - torch
--- a/maps/gptj.json
+++ b/maps/gptj.json
@@ -9,11 +9,11 @@
  },
  "static_weights": {
    "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
-    "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}},
+    "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}},
    "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
    "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
    "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
-    "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}}
+    "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}
  },
  "layer_weights": {
    "transformer.h.{layer}.attn.bias": {},
--- a/play.bat
+++ b/play.bat
@@ -2,6 +2,8 @@
 cd /D %~dp0
 SET CONDA_SHLVL=
 rmdir /S /Q flask_session
 TITLE KoboldAI - Server
 SET /P M=<loader.settings
 IF %M%==1 GOTO drivemap
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1149,7 +1149,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
            params[param] = default_params[param]
    # Use an optimization that will allow us to avoid one extra transpose operation
-    params["transposed_linear"] = True
+    if hf_checkpoint:
        params["transposed_linear"] = True
    # Load tokenizer
    if vars.model == "TPUMeshTransformerGPTNeoX":
@@ -1307,7 +1308,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                    if "divide_by_shards" in transforms:
                        tensor /= params["cores_per_replica"]
                    if "vocab_pad" in transforms:
-                        tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
+                        tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
                    # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
                    #if "no_transpose" not in transforms and tensor.ndim == 2:
                    #    tensor = tensor.T