mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
RWKV Update
This commit is contained in:
@@ -43,7 +43,7 @@ T_MAX = 1024 # increase this if your ctx_len is long [NOTE: TAKES LOTS OF VRAM!]
|
|||||||
# it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice
|
# it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice
|
||||||
|
|
||||||
from torch.utils.cpp_extension import load
|
from torch.utils.cpp_extension import load
|
||||||
wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"],
|
wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"],
|
||||||
verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])
|
verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])
|
||||||
|
|
||||||
class WKV(torch.autograd.Function):
|
class WKV(torch.autograd.Function):
|
@@ -23,7 +23,7 @@ if os.environ['RWKV_RUN_DEVICE'] == 'cuda':
|
|||||||
# it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice
|
# it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice
|
||||||
|
|
||||||
from torch.utils.cpp_extension import load
|
from torch.utils.cpp_extension import load
|
||||||
wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"],
|
wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"],
|
||||||
verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])
|
verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])
|
||||||
|
|
||||||
class WKV(torch.autograd.Function):
|
class WKV(torch.autograd.Function):
|
28
aiserver.py
28
aiserver.py
@@ -144,7 +144,7 @@ model_menu = {
|
|||||||
["Untuned Fairseq Dense", "fsdlist", "", True],
|
["Untuned Fairseq Dense", "fsdlist", "", True],
|
||||||
["Untuned Bloom", "bloomlist", "", True],
|
["Untuned Bloom", "bloomlist", "", True],
|
||||||
["Untuned XGLM", "xglmlist", "", True],
|
["Untuned XGLM", "xglmlist", "", True],
|
||||||
["Untuned RWKV-4", "rwkvlist", "", True],
|
["Untuned RWKV-4 (Experimental)", "rwkvlist", "", True],
|
||||||
["Untuned GPT2", "gpt2list", "", True],
|
["Untuned GPT2", "gpt2list", "", True],
|
||||||
["Online Services", "apilist", "", True],
|
["Online Services", "apilist", "", True],
|
||||||
["Read Only (No AI)", "ReadOnly", "", False]
|
["Read Only (No AI)", "ReadOnly", "", False]
|
||||||
@@ -756,6 +756,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
|
|||||||
#If we send one of the manual load options, send back the list of model directories, otherwise send the menu
|
#If we send one of the manual load options, send back the list of model directories, otherwise send the menu
|
||||||
if menu in ('NeoCustom', 'GPT2Custom'):
|
if menu in ('NeoCustom', 'GPT2Custom'):
|
||||||
(paths, breadcrumbs) = get_folder_path_info(folder)
|
(paths, breadcrumbs) = get_folder_path_info(folder)
|
||||||
|
paths = [x for x in paths if "rwkv" not in x[1].lower()]
|
||||||
if koboldai_vars.host:
|
if koboldai_vars.host:
|
||||||
breadcrumbs = []
|
breadcrumbs = []
|
||||||
menu_list = [[folder, menu, "", False] for folder in paths]
|
menu_list = [[folder, menu, "", False] for folder in paths]
|
||||||
@@ -777,7 +778,9 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
|
|||||||
} for m in menu_list_ui_2]
|
} for m in menu_list_ui_2]
|
||||||
emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': breadcrumbs, "showdelete": showdelete}, broadcast=False)
|
emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': breadcrumbs, "showdelete": showdelete}, broadcast=False)
|
||||||
else:
|
else:
|
||||||
emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1")
|
# Hide experimental models unless experimental mode is enabled
|
||||||
|
filtered_menu = [x for x in model_menu[menu] if koboldai_vars.experimental_features or "(experimental)" not in x[0].lower()]
|
||||||
|
emit('from_server', {'cmd': 'show_model_menu', 'data': filtered_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1")
|
||||||
|
|
||||||
p_menu = [{
|
p_menu = [{
|
||||||
"label": m[0],
|
"label": m[0],
|
||||||
@@ -785,7 +788,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
|
|||||||
"size": m[2],
|
"size": m[2],
|
||||||
"isMenu": m[3],
|
"isMenu": m[3],
|
||||||
"isDownloaded": is_model_downloaded(m[1]) if not m[3] else False,
|
"isDownloaded": is_model_downloaded(m[1]) if not m[3] else False,
|
||||||
} for m in model_menu[menu]]
|
} for m in filtered_menu]
|
||||||
emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=False)
|
emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=False)
|
||||||
|
|
||||||
def get_folder_path_info(base):
|
def get_folder_path_info(base):
|
||||||
@@ -5819,14 +5822,19 @@ def rwkv_init(model_class: str, use_gpu: bool = False):
|
|||||||
|
|
||||||
os.environ["RWKV_RUN_DEVICE"] = device
|
os.environ["RWKV_RUN_DEVICE"] = device
|
||||||
|
|
||||||
TOKENIZER_PATH = "models/RWKV4/20B_tokenizer.json"
|
TOKENIZER_PATH = "RWKV4/20B_tokenizer.json"
|
||||||
MODEL_DIR = "models/RWKV4/models"
|
MODEL_DIR = "models"
|
||||||
|
|
||||||
model_files = os.listdir(MODEL_DIR)
|
model_files = os.listdir(MODEL_DIR)
|
||||||
matching_models = [f for f in model_files if f.startswith(f"RWKV-4-Pile-{model_class}")]
|
matching_models = [f for f in model_files if f.startswith(f"RWKV-4-{model_class}")]
|
||||||
|
|
||||||
if not matching_models:
|
if not matching_models:
|
||||||
raise RuntimeError(f"No models of class '{model_class}' found in '{MODEL_DIR}'. Did you rename the model?")
|
raise RuntimeError(
|
||||||
model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1])
|
f"No models of class '{model_class}' found in '{MODEL_DIR}'. Please download a model from " \
|
||||||
|
"https://huggingface.co/BlinkDL, rename the .pth file to 'model.pth', and place the it in a directory named "\
|
||||||
|
"'{MODEL_DIR}/RWKV-4-XYZ', where XYZ is the parameter string of the model (169M, 430M, 1B5, 3B, or 7B)."
|
||||||
|
)
|
||||||
|
model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1], "model.pth")
|
||||||
|
|
||||||
model_config = {
|
model_config = {
|
||||||
"169M": RWKVConfig(n_layer=12, n_embed=768, ctx_len=1024),
|
"169M": RWKVConfig(n_layer=12, n_embed=768, ctx_len=1024),
|
||||||
@@ -5840,10 +5848,10 @@ def rwkv_init(model_class: str, use_gpu: bool = False):
|
|||||||
raise RuntimeError(f"No config for model '{model_class}' found!")
|
raise RuntimeError(f"No config for model '{model_class}' found!")
|
||||||
|
|
||||||
if not os.path.exists(TOKENIZER_PATH):
|
if not os.path.exists(TOKENIZER_PATH):
|
||||||
raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'. Did you download it and put it at that location?")
|
raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'!")
|
||||||
|
|
||||||
# Model stuff
|
# Model stuff
|
||||||
from models.RWKV4.src.model_run import RWKV_RNN
|
from RWKV4.src.model_run import RWKV_RNN
|
||||||
from transformers import PreTrainedTokenizerFast
|
from transformers import PreTrainedTokenizerFast
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user