diff --git a/models/RWKV4/LICENSE.txt b/RWKV4/LICENSE.txt similarity index 100% rename from models/RWKV4/LICENSE.txt rename to RWKV4/LICENSE.txt diff --git a/models/RWKV4/cuda/wkv_cuda.cu b/RWKV4/cuda/wkv_cuda.cu similarity index 100% rename from models/RWKV4/cuda/wkv_cuda.cu rename to RWKV4/cuda/wkv_cuda.cu diff --git a/models/RWKV4/cuda/wkv_op.cpp b/RWKV4/cuda/wkv_op.cpp similarity index 100% rename from models/RWKV4/cuda/wkv_op.cpp rename to RWKV4/cuda/wkv_op.cpp diff --git a/models/RWKV4/src/model.py b/RWKV4/src/model.py similarity index 99% rename from models/RWKV4/src/model.py rename to RWKV4/src/model.py index 085a0bcb..63f7315c 100644 --- a/models/RWKV4/src/model.py +++ b/RWKV4/src/model.py @@ -43,7 +43,7 @@ T_MAX = 1024 # increase this if your ctx_len is long [NOTE: TAKES LOTS OF VRAM!] # it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice from torch.utils.cpp_extension import load -wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"], +wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"], verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}']) class WKV(torch.autograd.Function): diff --git a/models/RWKV4/src/model_run.py b/RWKV4/src/model_run.py similarity index 99% rename from models/RWKV4/src/model_run.py rename to RWKV4/src/model_run.py index 0373dda5..74c719d3 100644 --- a/models/RWKV4/src/model_run.py +++ b/RWKV4/src/model_run.py @@ -23,7 +23,7 @@ if os.environ['RWKV_RUN_DEVICE'] == 'cuda': # it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice from torch.utils.cpp_extension import load - wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"], + wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"], verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}']) class WKV(torch.autograd.Function): diff --git a/models/RWKV4/src/utils.py b/RWKV4/src/utils.py similarity index 100% rename from models/RWKV4/src/utils.py rename to RWKV4/src/utils.py diff --git a/aiserver.py b/aiserver.py index ca200088..ec216c4a 100644 --- a/aiserver.py +++ b/aiserver.py @@ -144,7 +144,7 @@ model_menu = { ["Untuned Fairseq Dense", "fsdlist", "", True], ["Untuned Bloom", "bloomlist", "", True], ["Untuned XGLM", "xglmlist", "", True], - ["Untuned RWKV-4", "rwkvlist", "", True], + ["Untuned RWKV-4 (Experimental)", "rwkvlist", "", True], ["Untuned GPT2", "gpt2list", "", True], ["Online Services", "apilist", "", True], ["Read Only (No AI)", "ReadOnly", "", False] @@ -756,6 +756,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"): #If we send one of the manual load options, send back the list of model directories, otherwise send the menu if menu in ('NeoCustom', 'GPT2Custom'): (paths, breadcrumbs) = get_folder_path_info(folder) + paths = [x for x in paths if "rwkv" not in x[1].lower()] if koboldai_vars.host: breadcrumbs = [] menu_list = [[folder, menu, "", False] for folder in paths] @@ -777,7 +778,9 @@ def sendModelSelection(menu="mainmenu", folder="./models"): } for m in menu_list_ui_2] emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': breadcrumbs, "showdelete": showdelete}, broadcast=False) else: - emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1") + # Hide experimental models unless experimental mode is enabled + filtered_menu = [x for x in model_menu[menu] if koboldai_vars.experimental_features or "(experimental)" not in x[0].lower()] + emit('from_server', {'cmd': 'show_model_menu', 'data': filtered_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1") p_menu = [{ "label": m[0], @@ -785,7 +788,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"): "size": m[2], "isMenu": m[3], "isDownloaded": is_model_downloaded(m[1]) if not m[3] else False, - } for m in model_menu[menu]] + } for m in filtered_menu] emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=False) def get_folder_path_info(base): @@ -5819,14 +5822,19 @@ def rwkv_init(model_class: str, use_gpu: bool = False): os.environ["RWKV_RUN_DEVICE"] = device - TOKENIZER_PATH = "models/RWKV4/20B_tokenizer.json" - MODEL_DIR = "models/RWKV4/models" + TOKENIZER_PATH = "RWKV4/20B_tokenizer.json" + MODEL_DIR = "models" model_files = os.listdir(MODEL_DIR) - matching_models = [f for f in model_files if f.startswith(f"RWKV-4-Pile-{model_class}")] + matching_models = [f for f in model_files if f.startswith(f"RWKV-4-{model_class}")] + if not matching_models: - raise RuntimeError(f"No models of class '{model_class}' found in '{MODEL_DIR}'. Did you rename the model?") - model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1]) + raise RuntimeError( + f"No models of class '{model_class}' found in '{MODEL_DIR}'. Please download a model from " \ + "https://huggingface.co/BlinkDL, rename the .pth file to 'model.pth', and place the it in a directory named "\ + "'{MODEL_DIR}/RWKV-4-XYZ', where XYZ is the parameter string of the model (169M, 430M, 1B5, 3B, or 7B)." + ) + model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1], "model.pth") model_config = { "169M": RWKVConfig(n_layer=12, n_embed=768, ctx_len=1024), @@ -5840,10 +5848,10 @@ def rwkv_init(model_class: str, use_gpu: bool = False): raise RuntimeError(f"No config for model '{model_class}' found!") if not os.path.exists(TOKENIZER_PATH): - raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'. Did you download it and put it at that location?") + raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'!") # Model stuff - from models.RWKV4.src.model_run import RWKV_RNN + from RWKV4.src.model_run import RWKV_RNN from transformers import PreTrainedTokenizerFast from torch.nn import functional as F