Merge commit 'refs/pull/331/head' of https://github.com/ebolam/KoboldAI into UI2

2025-06-05 21:59:24 +02:00 · 2022-12-07 12:04:42 -05:00
parent fcacd53b62 d0cb463c53
commit c1cea71ed6
11 changed files with 162 additions and 63 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python3
 #==================================================================#
 # KoboldAI
-# Version: 1.19.1
+# Version: 1.19.2
 # By: The KoboldAI Community
 #==================================================================#

@@ -171,6 +171,7 @@ model_menu = {
        ["NSFW Models", "nsfwlist", "", True],
        ["Untuned OPT", "optlist", "", True],
        ["Untuned GPT-Neo/J", "gptneolist", "", True],
+        ["Untuned Pythia", "pythialist", "", True],
        ["Untuned Fairseq Dense", "fsdlist", "", True],
        ["Untuned Bloom", "bloomlist", "", True],
        ["Untuned XGLM", "xglmlist", "", True],
@@ -201,6 +202,7 @@ model_menu = {
        ["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False],
        ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
        ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
+        ["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False],       
        ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
        ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
        ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
@@ -230,12 +232,31 @@ model_menu = {
        ],
    'gptneolist': [
        ["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False],
+        ["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False],
        ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
        ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
        ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
+        ["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False],
+        ["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False],
        ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
        ["Return to Main Menu", "mainmenu", "", True],
        ],
+    'pythialist': [
+        ["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False],
+        ["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False],
+        ["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False],
+        ["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False],
+        ["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False],
+        ["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False],
+        ["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False],
+        ["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False],
+        ["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False],        
+        ["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False],
+        ["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False],
+        ["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False],
+        ["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False],
+        ["Return to Main Menu", "mainmenu", "", True],
+        ],
    'gpt2list': [
        ["GPT-2 XL", "gpt2-xl", "6GB", False],
        ["GPT-2 Large", "gpt2-large", "4GB", False],
@@ -1166,7 +1187,7 @@ def loadmodelsettings():
    if("nobreakmodel" in js):
        koboldai_vars.nobreakmodel = js["nobreakmodel"]
    if("sampler_order" in js):
-        sampler_order = koboldai_vars.sampler_order
+        sampler_order = js["sampler_order"]
        if(len(sampler_order) < 7):
            sampler_order = [6] + sampler_order
        koboldai_vars.sampler_order = sampler_order
@@ -1247,6 +1268,119 @@ def loadsettings():
        with open("settings/" + getmodelname().replace('/', '_') + ".v2_settings", "r") as file:
            getattr(koboldai_vars, "_model_settings").from_json(file.read())
        
+        processsettings(js)
+        file.close()
+    if(path.exists(get_config_filename())):
+        # Read file contents into JSON object
+        file = open(get_config_filename(), "r")
+        js   = json.load(file)
+        
+        processsettings(js)
+        file.close()
+        
+def processsettings(js):
+# Copy file contents to vars
+    if("apikey" in js):
+        # If the model is the HORDE, then previously saved API key in settings
+        # Will always override a new key set.
+        if koboldai_vars.model != "CLUSTER" or koboldai_vars.apikey == '':
+            koboldai_vars.apikey = js["apikey"]
+    if("andepth" in js):
+        koboldai_vars.andepth = js["andepth"]
+    if("sampler_order" in js):
+        sampler_order = js["sampler_order"]
+        if(len(sampler_order) < 7):
+            sampler_order = [6] + sampler_order
+        koboldai_vars.sampler_order = sampler_order
+    if("temp" in js):
+        koboldai_vars.temp = js["temp"]
+    if("top_p" in js):
+        koboldai_vars.top_p = js["top_p"]
+    if("top_k" in js):
+        koboldai_vars.top_k = js["top_k"]
+    if("tfs" in js):
+        koboldai_vars.tfs = js["tfs"]
+    if("typical" in js):
+        koboldai_vars.typical = js["typical"]
+    if("top_a" in js):
+        koboldai_vars.top_a = js["top_a"]
+    if("rep_pen" in js):
+        koboldai_vars.rep_pen = js["rep_pen"]
+    if("rep_pen_slope" in js):
+        koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
+    if("rep_pen_range" in js):
+        koboldai_vars.rep_pen_range = js["rep_pen_range"]
+    if("genamt" in js):
+        koboldai_vars.genamt = js["genamt"]
+    if("max_length" in js):
+        koboldai_vars.max_length = js["max_length"]
+    if("ikgen" in js):
+        koboldai_vars.ikgen = js["ikgen"]
+    if("formatoptns" in js):
+        koboldai_vars.formatoptns = js["formatoptns"]
+    if("numseqs" in js):
+        koboldai_vars.numseqs = js["numseqs"]
+    if("widepth" in js):
+        koboldai_vars.widepth = js["widepth"]
+    if("useprompt" in js):
+        koboldai_vars.useprompt = js["useprompt"]
+    if("adventure" in js):
+        koboldai_vars.adventure = js["adventure"]
+    if("chatmode" in js):
+        koboldai_vars.chatmode = js["chatmode"]
+    if("chatname" in js):
+        koboldai_vars.chatname = js["chatname"]
+    if("dynamicscan" in js):
+        koboldai_vars.dynamicscan = js["dynamicscan"]
+    if("nopromptgen" in js):
+        koboldai_vars.nopromptgen = js["nopromptgen"]
+    if("rngpersist" in js):
+        koboldai_vars.rngpersist = js["rngpersist"]
+    if("nogenmod" in js):
+        koboldai_vars.nogenmod = js["nogenmod"]
+    if("fulldeterminism" in js):
+        koboldai_vars.full_determinism = js["fulldeterminism"]
+    if("autosave" in js):
+        koboldai_vars.autosave = js["autosave"]
+    if("newlinemode" in js):
+        koboldai_vars.newlinemode = js["newlinemode"]
+    if("welcome" in js):
+        koboldai_vars.welcome = js["welcome"]
+    if("output_streaming" in js):
+        koboldai_vars.output_streaming = js["output_streaming"]
+    if("show_probs" in js):
+        koboldai_vars.show_probs = js["show_probs"]
+    if("show_budget" in js):
+        koboldai_vars.show_budget = js["show_budget"]
+    
+    if("seed" in js):
+        koboldai_vars.seed = js["seed"]
+        if(koboldai_vars.seed is not None):
+            koboldai_vars.seed_specified = True
+        else:
+            koboldai_vars.seed_specified = False
+    else:
+        koboldai_vars.seed_specified = False
+
+    if("antemplate" in js):
+        koboldai_vars.setauthornotetemplate = js["antemplate"]
+        if(not koboldai_vars.gamestarted):
+            koboldai_vars.authornotetemplate = koboldai_vars.setauthornotetemplate
+    
+    if("userscripts" in js):
+        koboldai_vars.userscripts = []
+        for userscript in js["userscripts"]:
+            if type(userscript) is not str:
+                continue
+            userscript = userscript.strip()
+            if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
+                koboldai_vars.userscripts.append(userscript)
+
+    if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
+        koboldai_vars.corescript = js["corescript"]
+    else:
+        koboldai_vars.corescript = "default.lua"
+
 #==================================================================#
 #  Load a soft prompt from a file
 #==================================================================#
@@ -2802,6 +2936,15 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                            if utils.num_shards is None or utils.current_shard >= utils.num_shards:
                                if utils.offload_index:
                                    for name, tensor in utils.named_buffers:
+                                        dtype = tensor.dtype
+                                        if convert_to_float16 and breakmodel.primary_device != "cpu" and vars.hascuda and (vars.breakmodel or vars.usegpu):
+                                            dtype = torch.float16
+                                        if breakmodel.primary_device == "cpu" or (not vars.usegpu and not vars.breakmodel):
+                                            dtype = torch.float32
+                                        if name in model_dict and model_dict[name].dtype is not dtype:
+                                            model_dict[name] = model_dict[name].to(dtype)
+                                        if tensor.dtype is not dtype:
+                                            tensor = tensor.to(dtype)
                                        if name not in utils.offload_index:
                                            accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
                                    accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
@@ -2972,7 +3115,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                        if not args.colab or args.savemodel:
                            import shutil
                            tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')))
-                            if(koboldai_vars.fp32_model):  # Use save_pretrained to convert fp32 models to fp16
+                            if(koboldai_vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)):  # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case
                                model = model.half()
                                model.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), max_shard_size="500MiB")
                            else:  # For fp16 models, we can just copy the model files directly
--- a/environments/finetuneanon.yml
+++ b/environments/finetuneanon.yml
@@ -1,26 +0,0 @@
-name: koboldai
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - colorama
-  - flask-socketio
-  - flask-session
-  - pytorch
-  - cudatoolkit=11.1
-  - tensorflow-gpu
-  - python=3.8.*
-  - eventlet
-  - markdown
-  - bleach=4.1.0
-  - pip
-  - git=2.35.1
-  - marshmallow>=3.13
-  - apispec-webframeworks
-  - loguru
-  - pip:
-    - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
-    - flask-cloudflared
-    - flask-ngrok
-    - lupa==1.10
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -28,7 +28,8 @@ dependencies:
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - transformers>=4.24.0
+    - transformers==4.24.0
+    - huggingface_hub>=0.10.1
    - accelerate
    - git+https://github.com/VE-FORBRYDERNE/mkultra
    - flask-session
--- a/environments/rocm-finetune.yml
+++ b/environments/rocm-finetune.yml
@@ -1,25 +0,0 @@
-name: koboldai-ft
-channels:
-  - conda-forge
-  - defaults
-dependencies:
-  - colorama
-  - flask-socketio
-  - flask-session
-  - python=3.8.*
-  - eventlet
-  - markdown
-  - bleach=4.1.0
-  - pip
-  - git=2.35.1
-  - marshmallow>=3.13
-  - apispec-webframeworks
-  - loguru
-  - pip:
-    - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
-    - torch
-    - torchvision==0.11.1
-    - flask-cloudflared
-    - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
-    - flask-ngrok
-    - lupa==1.10
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -28,7 +28,8 @@ dependencies:
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - transformers>=4.24.0
+    - ttransformers==4.24.0
+    - huggingface_hub>=0.10.1
    - accelerate
    - git+https://github.com/VE-FORBRYDERNE/mkultra
    - ansi2html
--- a/fileops.py
+++ b/fileops.py
@@ -86,7 +86,7 @@ def uspath(filename):
 def getstoryfiles():
    list = []
    for file in listdir("stories"):
-        if file.endswith(".json"):
+        if file.endswith(".json") and not file.endswith(".v2.json"):
            ob = {}
            ob["name"] = file.replace(".json", "")
            f = open("stories/"+file, "r")
--- a/maps/gptj.json
+++ b/maps/gptj.json
@@ -9,11 +9,11 @@
  },
  "static_weights": {
    "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
-    "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}},
+    "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}},
    "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
    "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
    "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
-    "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}}
+    "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}
  },
  "layer_weights": {
    "transformer.h.{layer}.attn.bias": {},
--- a/play.bat
+++ b/play.bat
@@ -2,6 +2,8 @@
 cd /D %~dp0
 SET CONDA_SHLVL=

+rmdir /S /Q flask_session
+
 TITLE KoboldAI - Server
 SET /P M=<loader.settings
 IF %M%==1 GOTO drivemap
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
-transformers>=4.20.1
+transformers==4.24.0
+huggingface_hub>=0.10.1
 Flask
 Flask-SocketIO
 requests
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,8 @@ requests
 dm-haiku == 0.0.5
 jax == 0.2.21
 jaxlib >= 0.1.69, <= 0.3.7
-transformers >=4.20.1
+transformers == 4.24.0
+huggingface_hub >= 0.10.1
 progressbar2
 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
 flask
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1180,7 +1180,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
            params[param] = default_params[param]

    # Use an optimization that will allow us to avoid one extra transpose operation
-    params["transposed_linear"] = True
+    if hf_checkpoint:
+        params["transposed_linear"] = True

    # Load tokenizer
    if koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
@@ -1376,7 +1377,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                    if "divide_by_shards" in transforms:
                        tensor /= params["cores_per_replica"]
                    if "vocab_pad" in transforms:
-                        tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
+                        tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
                    # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
                    #if "no_transpose" not in transforms and tensor.ndim == 2:
                    #    tensor = tensor.T