mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
27
aiserver.py
27
aiserver.py
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# KoboldAI
|
# KoboldAI
|
||||||
# Version: 1.19.1
|
# Version: 1.19.2
|
||||||
# By: The KoboldAI Community
|
# By: The KoboldAI Community
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
|
|
||||||
@@ -125,6 +125,7 @@ model_menu = {
|
|||||||
["NSFW Models", "nsfwlist", "", True],
|
["NSFW Models", "nsfwlist", "", True],
|
||||||
["Untuned OPT", "optlist", "", True],
|
["Untuned OPT", "optlist", "", True],
|
||||||
["Untuned GPT-Neo/J", "gptneolist", "", True],
|
["Untuned GPT-Neo/J", "gptneolist", "", True],
|
||||||
|
["Untuned Pythia", "pythialist", "", True],
|
||||||
["Untuned Fairseq Dense", "fsdlist", "", True],
|
["Untuned Fairseq Dense", "fsdlist", "", True],
|
||||||
["Untuned Bloom", "bloomlist", "", True],
|
["Untuned Bloom", "bloomlist", "", True],
|
||||||
["Untuned XGLM", "xglmlist", "", True],
|
["Untuned XGLM", "xglmlist", "", True],
|
||||||
@@ -154,6 +155,7 @@ model_menu = {
|
|||||||
["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False],
|
["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False],
|
||||||
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
|
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False],
|
||||||
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
|
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False],
|
||||||
|
["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False],
|
||||||
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
|
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False],
|
||||||
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
|
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False],
|
||||||
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
|
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False],
|
||||||
@@ -183,12 +185,31 @@ model_menu = {
|
|||||||
],
|
],
|
||||||
'gptneolist': [
|
'gptneolist': [
|
||||||
["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False],
|
["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False],
|
||||||
|
["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False],
|
||||||
["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
|
["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False],
|
||||||
["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
|
["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False],
|
||||||
["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
|
["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False],
|
||||||
|
["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False],
|
||||||
|
["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False],
|
||||||
["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
|
["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False],
|
||||||
["Return to Main Menu", "mainmenu", "", True],
|
["Return to Main Menu", "mainmenu", "", True],
|
||||||
],
|
],
|
||||||
|
'pythialist': [
|
||||||
|
["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False],
|
||||||
|
["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False],
|
||||||
|
["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False],
|
||||||
|
["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False],
|
||||||
|
["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False],
|
||||||
|
["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False],
|
||||||
|
["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False],
|
||||||
|
["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False],
|
||||||
|
["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False],
|
||||||
|
["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False],
|
||||||
|
["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False],
|
||||||
|
["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False],
|
||||||
|
["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False],
|
||||||
|
["Return to Main Menu", "mainmenu", "", True],
|
||||||
|
],
|
||||||
'gpt2list': [
|
'gpt2list': [
|
||||||
["GPT-2 XL", "gpt2-xl", "6GB", False],
|
["GPT-2 XL", "gpt2-xl", "6GB", False],
|
||||||
["GPT-2 Large", "gpt2-large", "4GB", False],
|
["GPT-2 Large", "gpt2-large", "4GB", False],
|
||||||
@@ -996,7 +1017,7 @@ def loadmodelsettings():
|
|||||||
if("nobreakmodel" in js):
|
if("nobreakmodel" in js):
|
||||||
vars.nobreakmodel = js["nobreakmodel"]
|
vars.nobreakmodel = js["nobreakmodel"]
|
||||||
if("sampler_order" in js):
|
if("sampler_order" in js):
|
||||||
sampler_order = vars.sampler_order
|
sampler_order = js["sampler_order"]
|
||||||
if(len(sampler_order) < 7):
|
if(len(sampler_order) < 7):
|
||||||
sampler_order = [6] + sampler_order
|
sampler_order = [6] + sampler_order
|
||||||
vars.sampler_order = sampler_order
|
vars.sampler_order = sampler_order
|
||||||
@@ -1134,7 +1155,7 @@ def processsettings(js):
|
|||||||
if("andepth" in js):
|
if("andepth" in js):
|
||||||
vars.andepth = js["andepth"]
|
vars.andepth = js["andepth"]
|
||||||
if("sampler_order" in js):
|
if("sampler_order" in js):
|
||||||
sampler_order = vars.sampler_order
|
sampler_order = js["sampler_order"]
|
||||||
if(len(sampler_order) < 7):
|
if(len(sampler_order) < 7):
|
||||||
sampler_order = [6] + sampler_order
|
sampler_order = [6] + sampler_order
|
||||||
vars.sampler_order = sampler_order
|
vars.sampler_order = sampler_order
|
||||||
|
@@ -1,26 +0,0 @@
|
|||||||
name: koboldai
|
|
||||||
channels:
|
|
||||||
- pytorch
|
|
||||||
- conda-forge
|
|
||||||
- defaults
|
|
||||||
dependencies:
|
|
||||||
- colorama
|
|
||||||
- flask-socketio
|
|
||||||
- flask-session
|
|
||||||
- pytorch
|
|
||||||
- cudatoolkit=11.1
|
|
||||||
- tensorflow-gpu
|
|
||||||
- python=3.8.*
|
|
||||||
- eventlet
|
|
||||||
- markdown
|
|
||||||
- bleach=4.1.0
|
|
||||||
- pip
|
|
||||||
- git=2.35.1
|
|
||||||
- marshmallow>=3.13
|
|
||||||
- apispec-webframeworks
|
|
||||||
- loguru
|
|
||||||
- pip:
|
|
||||||
- git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
|
|
||||||
- flask-cloudflared
|
|
||||||
- flask-ngrok
|
|
||||||
- lupa==1.10
|
|
@@ -21,6 +21,7 @@ dependencies:
|
|||||||
- apispec-webframeworks
|
- apispec-webframeworks
|
||||||
- loguru
|
- loguru
|
||||||
- termcolor
|
- termcolor
|
||||||
|
- psutil
|
||||||
- pip:
|
- pip:
|
||||||
- flask-cloudflared
|
- flask-cloudflared
|
||||||
- flask-ngrok
|
- flask-ngrok
|
||||||
|
@@ -1,25 +0,0 @@
|
|||||||
name: koboldai-ft
|
|
||||||
channels:
|
|
||||||
- conda-forge
|
|
||||||
- defaults
|
|
||||||
dependencies:
|
|
||||||
- colorama
|
|
||||||
- flask-socketio
|
|
||||||
- flask-session
|
|
||||||
- python=3.8.*
|
|
||||||
- eventlet
|
|
||||||
- markdown
|
|
||||||
- bleach=4.1.0
|
|
||||||
- pip
|
|
||||||
- git=2.35.1
|
|
||||||
- marshmallow>=3.13
|
|
||||||
- apispec-webframeworks
|
|
||||||
- loguru
|
|
||||||
- pip:
|
|
||||||
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
|
|
||||||
- torch
|
|
||||||
- torchvision==0.11.1
|
|
||||||
- flask-cloudflared
|
|
||||||
- git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
|
|
||||||
- flask-ngrok
|
|
||||||
- lupa==1.10
|
|
@@ -18,6 +18,7 @@ dependencies:
|
|||||||
- apispec-webframeworks
|
- apispec-webframeworks
|
||||||
- loguru
|
- loguru
|
||||||
- termcolor
|
- termcolor
|
||||||
|
- psutil
|
||||||
- pip:
|
- pip:
|
||||||
- --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
|
- --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
|
||||||
- torch
|
- torch
|
||||||
|
@@ -9,11 +9,11 @@
|
|||||||
},
|
},
|
||||||
"static_weights": {
|
"static_weights": {
|
||||||
"transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
|
"transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
|
||||||
"transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}},
|
"transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}},
|
||||||
"transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
|
"transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
|
||||||
"transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
|
"transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
|
||||||
"lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
|
"lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
|
||||||
"lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}}
|
"lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}
|
||||||
},
|
},
|
||||||
"layer_weights": {
|
"layer_weights": {
|
||||||
"transformer.h.{layer}.attn.bias": {},
|
"transformer.h.{layer}.attn.bias": {},
|
||||||
|
2
play.bat
2
play.bat
@@ -2,6 +2,8 @@
|
|||||||
cd /D %~dp0
|
cd /D %~dp0
|
||||||
SET CONDA_SHLVL=
|
SET CONDA_SHLVL=
|
||||||
|
|
||||||
|
rmdir /S /Q flask_session
|
||||||
|
|
||||||
TITLE KoboldAI - Server
|
TITLE KoboldAI - Server
|
||||||
SET /P M=<loader.settings
|
SET /P M=<loader.settings
|
||||||
IF %M%==1 GOTO drivemap
|
IF %M%==1 GOTO drivemap
|
||||||
|
@@ -1149,7 +1149,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
params[param] = default_params[param]
|
params[param] = default_params[param]
|
||||||
|
|
||||||
# Use an optimization that will allow us to avoid one extra transpose operation
|
# Use an optimization that will allow us to avoid one extra transpose operation
|
||||||
params["transposed_linear"] = True
|
if hf_checkpoint:
|
||||||
|
params["transposed_linear"] = True
|
||||||
|
|
||||||
# Load tokenizer
|
# Load tokenizer
|
||||||
if vars.model == "TPUMeshTransformerGPTNeoX":
|
if vars.model == "TPUMeshTransformerGPTNeoX":
|
||||||
@@ -1307,7 +1308,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
if "divide_by_shards" in transforms:
|
if "divide_by_shards" in transforms:
|
||||||
tensor /= params["cores_per_replica"]
|
tensor /= params["cores_per_replica"]
|
||||||
if "vocab_pad" in transforms:
|
if "vocab_pad" in transforms:
|
||||||
tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
|
tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
|
||||||
# We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
|
# We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
|
||||||
#if "no_transpose" not in transforms and tensor.ndim == 2:
|
#if "no_transpose" not in transforms and tensor.ndim == 2:
|
||||||
# tensor = tensor.T
|
# tensor = tensor.T
|
||||||
|
Reference in New Issue
Block a user