RWKV Update

2025-06-05 21:59:24 +02:00 · 2022-10-23 12:31:09 -05:00
parent ba1b33d092
commit 6f7e592f5e
7 changed files with 20 additions and 12 deletions
--- a/models/RWKV4/LICENSE.txt
+++ b/models/RWKV4/LICENSE.txt
--- a/models/RWKV4/cuda/wkv_cuda.cu
+++ b/models/RWKV4/cuda/wkv_cuda.cu
--- a/models/RWKV4/cuda/wkv_op.cpp
+++ b/models/RWKV4/cuda/wkv_op.cpp
--- a/models/RWKV4/src/model.py
+++ b/models/RWKV4/src/model.py
@@ -43,7 +43,7 @@ T_MAX = 1024 # increase this if your ctx_len is long [NOTE: TAKES LOTS OF VRAM!]
 # it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice

 from torch.utils.cpp_extension import load
-wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"],
+wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"],
                verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])

 class WKV(torch.autograd.Function):
--- a/models/RWKV4/src/model_run.py
+++ b/models/RWKV4/src/model_run.py
@@ -23,7 +23,7 @@ if os.environ['RWKV_RUN_DEVICE'] == 'cuda':
    # it's possible to go beyond CUDA limitations if you slice the ctx and pass the hidden state in each slice

    from torch.utils.cpp_extension import load
-    wkv_cuda = load(name="wkv", sources=["models/RWKV4/cuda/wkv_op.cpp", "models/RWKV4/cuda/wkv_cuda.cu"],
+    wkv_cuda = load(name="wkv", sources=["RWKV4/cuda/wkv_op.cpp", "RWKV4/cuda/wkv_cuda.cu"],
                    verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])

    class WKV(torch.autograd.Function):
--- a/models/RWKV4/src/utils.py
+++ b/models/RWKV4/src/utils.py
--- a/aiserver.py
+++ b/aiserver.py
@@ -144,7 +144,7 @@ model_menu = {
        ["Untuned Fairseq Dense", "fsdlist", "", True],
        ["Untuned Bloom", "bloomlist", "", True],
        ["Untuned XGLM", "xglmlist", "", True],
-        ["Untuned RWKV-4", "rwkvlist", "", True],
+        ["Untuned RWKV-4 (Experimental)", "rwkvlist", "", True],
        ["Untuned GPT2", "gpt2list", "", True],
        ["Online Services", "apilist", "", True],
        ["Read Only (No AI)", "ReadOnly", "", False]
@@ -756,6 +756,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
    #If we send one of the manual load options, send back the list of model directories, otherwise send the menu
    if menu in ('NeoCustom', 'GPT2Custom'):
        (paths, breadcrumbs) = get_folder_path_info(folder)
+        paths = [x for x in paths if "rwkv" not in x[1].lower()]
        if koboldai_vars.host:
            breadcrumbs = []
        menu_list = [[folder, menu, "", False] for folder in paths]
@@ -777,7 +778,9 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
        } for m in menu_list_ui_2]
        emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': breadcrumbs, "showdelete": showdelete}, broadcast=False)
    else:
-        emit('from_server', {'cmd': 'show_model_menu', 'data': model_menu[menu], 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1")
+        # Hide experimental models unless experimental mode is enabled
+        filtered_menu = [x for x in model_menu[menu] if koboldai_vars.experimental_features or "(experimental)" not in x[0].lower()]
+        emit('from_server', {'cmd': 'show_model_menu', 'data': filtered_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=True, room="UI_1")

        p_menu = [{
            "label": m[0],
@@ -785,7 +788,7 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
            "size": m[2],
            "isMenu": m[3],
            "isDownloaded": is_model_downloaded(m[1]) if not m[3] else False,
-        } for m in model_menu[menu]]
+        } for m in filtered_menu]
        emit('show_model_menu', {'data': p_menu, 'menu': menu, 'breadcrumbs': [], "showdelete": False}, broadcast=False)

 def get_folder_path_info(base):
@@ -5819,14 +5822,19 @@ def rwkv_init(model_class: str, use_gpu: bool = False):
    
    os.environ["RWKV_RUN_DEVICE"] = device

-    TOKENIZER_PATH = "models/RWKV4/20B_tokenizer.json"
-    MODEL_DIR = "models/RWKV4/models"
+    TOKENIZER_PATH = "RWKV4/20B_tokenizer.json"
+    MODEL_DIR = "models"

    model_files = os.listdir(MODEL_DIR)
-    matching_models = [f for f in model_files if f.startswith(f"RWKV-4-Pile-{model_class}")]
+    matching_models = [f for f in model_files if f.startswith(f"RWKV-4-{model_class}")]
+
    if not matching_models:
-        raise RuntimeError(f"No models of class '{model_class}' found in '{MODEL_DIR}'. Did you rename the model?")
-    model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1])
+        raise RuntimeError(
+            f"No models of class '{model_class}' found in '{MODEL_DIR}'. Please download a model from " \
+            "https://huggingface.co/BlinkDL, rename the .pth file to 'model.pth', and place the it in a directory named "\
+            "'{MODEL_DIR}/RWKV-4-XYZ', where XYZ is the parameter string of the model (169M, 430M, 1B5, 3B, or 7B)."
+        )
+    model_path = os.path.join(MODEL_DIR, sorted(matching_models)[-1], "model.pth")

    model_config = {
        "169M": RWKVConfig(n_layer=12, n_embed=768, ctx_len=1024),
@@ -5840,10 +5848,10 @@ def rwkv_init(model_class: str, use_gpu: bool = False):
        raise RuntimeError(f"No config for model '{model_class}' found!")
    
    if not os.path.exists(TOKENIZER_PATH):
-        raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'. Did you download it and put it at that location?")
+        raise RuntimeError(f"Can't find tokenizer at '{TOKENIZER_PATH}'!")
    
    # Model stuff
-    from models.RWKV4.src.model_run import RWKV_RNN
+    from RWKV4.src.model_run import RWKV_RNN
    from transformers import PreTrainedTokenizerFast
    from torch.nn import functional as F