diff --git a/aiserver.py b/aiserver.py
index a306449e..92dde7f4 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -136,7 +136,6 @@ class MenuModelType(Enum):
     HUGGINGFACE = 0
     ONLINE_API = 1
     OTHER = 2
-    RWKV = 3
 
 class MenuItem:
     def __init__(
@@ -243,7 +242,7 @@ model_menu = {
         MenuFolder("Untuned Fairseq Dense", "fsdlist"),
         MenuFolder("Untuned Bloom", "bloomlist"),
         MenuFolder("Untuned XGLM", "xglmlist"),
-        MenuFolder("Untuned RWKV-4 (Experimental)", "rwkvlist", experimental=True),
+        MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
         MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
@@ -370,16 +369,16 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV-4 14B ctx4096", "rwkv-4-pile-14b:ctx4096", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 14B ctx1024", "rwkv-4-pile-14b", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 7B ctx4096", "rwkv-4-pile-7b:ctx4096", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 7B ctx1024", "rwkv-4-pile-7b", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 3B ctx4096", "rwkv-4-pile-3b:ctx4096", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 3B ctx1024", "rwkv-4-pile-3b", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 1.5B ctx4096", "rwkv-4-pile-1b5:ctx4096", "9GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 1.5B ctx1024", "rwkv-4-pile-1b5", "9GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 340M", "rwkv-4-pile-430m", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 169M ctx1024", "rwkv-4-pile-169m", "?GB", model_type=MenuModelType.RWKV),
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [
@@ -3366,6 +3365,7 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
         soft_tokens=soft_tokens,
         sampler_order=koboldai_vars.sampler_order,
     )
+    genout = np.array(genout)
     genout = [utils.applyoutputformatting(utils.decodenewlines(tokenizer.decode(txt))) for txt in genout]
 
     return genout
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 1cc5a9c7..3d0ca633 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,7 +32,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.0
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index 51b3e852..eb2927bd 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -24,13 +24,13 @@ dependencies:
   - Pillow
   - psutil
   - pip:
-    - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
-    - torch==2.0.*
+    - --extra-index-url https://download.pytorch.org/whl/rocm5.2
+    - torch==1.13.1+rocm5.2
     - flask-cloudflared==0.0.10
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.0
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/koboldai_settings.py b/koboldai_settings.py
index e9562ffc..f3aa0ca9 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1128,7 +1128,7 @@ class story_settings(settings):
                 
 class user_settings(settings):
     local_only_variables = ['importjs']
-    no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"]
+    no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision", "model_selected"]
     settings_name = "user"
     def __init__(self, socketio):
         self._socketio = socketio
@@ -1184,6 +1184,7 @@ class user_settings(settings):
         self.horde_api_key = "0000000000"
         self.horde_worker_name = "My Awesome Instance"
         self.horde_url = "https://horde.koboldai.net"
+        self.model_selected = ""
         
     def __setattr__(self, name, value):
         new_variable = name not in self.__dict__
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 343eb39a..4a29a027 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -231,7 +231,7 @@ class InferenceModel:
             try:
                 return GenericTokenizer(try_get_tokenizer())
             except Exception as e:
-                logger.warning(f"Tokenizer falling back due to {e}")
+                logger.warning(f"Tokenizer falling back due to {e} (This can be normal behavior for some architectures that lack a slow tokenizer such as NeoX)")
                 # If we error on each attempt, raise the last one
                 if i == len(suppliers) - 1:
                     raise
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 759feb65..4e82d348 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -17,6 +17,7 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 from modeling.inference_models.parents.hf import HFInferenceModel
+from modeling.tokenizer import GenericTokenizer
 
 
 
@@ -197,8 +198,7 @@ class model_loader(HFInferenceModel):
         utils.koboldai_vars.modeldim = int(
             tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])
         )
-
-        self.tokenizer = tpu_mtj_backend.tokenizer
+        self.tokenizer = GenericTokenizer(tpu_mtj_backend.tokenizer)
 
         if (
             utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 03955d88..ba291c3f 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -173,8 +173,89 @@ class HFInferenceModel(InferenceModel):
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
         if utils.koboldai_vars.model_type == "llama":
-            self.tokenizer.decode_with_prefix_space = True
+            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
             self.tokenizer.add_bos_token = False
+
+            # HF transformers no longer supports decode_with_prefix_space
+            # We work around this by wrapping decode, encode, and __call__
+            # with versions that work around the 'prefix space' misfeature
+            # of sentencepiece.
+            vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
+            has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
+
+            # Wrap 'decode' with a method that always returns text starting with a space
+            # when the head token starts with a space. This is what 'decode_with_prefix_space'
+            # used to do, and we implement it using the same technique (building a cache of
+            # tokens that should have a prefix space, and then prepending a space if the first
+            # token is in this set.) We also work around a bizarre behavior in which decoding
+            # a single token 13 behaves differently than decoding a squence containing only [13].
+            original_decode = type(self.tokenizer.tokenizer).decode
+            def decode_wrapper(self, token_ids, *args, **kwargs):
+                first = None
+                # Note, the code below that wraps single-value token_ids in a list
+                # is to work around this wonky behavior:
+                #   >>> t.decode(13)
+                #   '<0x0A>'
+                #   >>> t.decode([13])
+                #   '\n'
+                # Not doing this causes token streaming to receive <0x0A> characters
+                # instead of newlines.
+                if isinstance(token_ids, int):
+                    first = token_ids
+                    token_ids = [first]
+                elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
+                    # Tensors don't support the Python standard of 'empty is False'
+                    # and the special case of dimension 0 tensors also needs to be
+                    # handled separately.
+                    if token_ids.dim() == 0:
+                        first = int(token_ids.item())
+                        token_ids = [first]
+                    elif len(token_ids) > 0:
+                        first = int(token_ids[0])
+                elif token_ids is not None and len(token_ids) > 0:
+                    first = token_ids[0]
+                result = original_decode(self, token_ids, *args, **kwargs)
+                if first is not None and first in has_prefix_space:
+                    result = " " + result
+                return result
+            # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
+            object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
+
+            # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
+            # The problem is that "Bob" at the start of text is encoded as if it is
+            # " Bob". This creates a problem because it means you can't split text, encode
+            # the pieces, concatenate the tokens, decode them, and get the original text back.
+            # The workaround is to prepend a known token that (1) starts with a space; and
+            # (2) is not the prefix of any other token. After searching through the vocab
+            # " ," (space comma) is the only token containing only printable ascii characters
+            # that fits this bill. By prepending ',' to the text, the original encode
+            # method always returns [1919, ...], where the tail of the sequence is the
+            # actual encoded result we want without the prefix space behavior.
+            original_encode = type(self.tokenizer.tokenizer).encode
+            def encode_wrapper(self, text, *args, **kwargs):
+                if type(text) is str:
+                    text = ',' + text
+                    result = original_encode(self, text, *args, **kwargs)
+                    result = result[1:]
+                else:
+                    result = original_encode(self, text, *args, **kwargs)
+                return result
+            object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
+
+            # Since 'encode' is documented as being deprecated, also override __call__.
+            # This doesn't appear to currently be used by KoboldAI, but doing so
+            # in case someone uses it in the future.
+            original_call = type(self.tokenizer.tokenizer).__call__
+            def call_wrapper(self, text, *args, **kwargs):
+                if type(text) is str:
+                    text = ',' + text
+                    result = original_call(self, text, *args, **kwargs)
+                    result = result[1:]
+                else:
+                    result = original_call(self, text, *args, **kwargs)
+                return result
+            object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
+
         elif utils.koboldai_vars.model_type == "opt":
             self.tokenizer._koboldai_header = self.tokenizer.encode("")
             self.tokenizer.add_bos_token = False
@@ -259,4 +340,4 @@ class HFInferenceModel(InferenceModel):
                 logger.warning(
                     "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                 )
-                utils.koboldai_vars.model_type = "gpt_neo"
+                utils.koboldai_vars.model_type = "gpt_neo"
\ No newline at end of file
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index aae3ada3..f0a4a66e 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -289,11 +289,13 @@ class HFTorchInferenceModel(HFInferenceModel):
                 raise
 
             logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
-            logger.debug(traceback_string)
+            logger.debug(traceback.format_exc())
+
             try:
                 return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
             except Exception as e:
                 logger.warning(f"Fell back to GPTNeoForCausalLM due to {e}")
+                logger.debug(traceback.format_exc())
                 return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs)
 
     def get_hidden_size(self) -> int:
@@ -420,19 +422,25 @@ class HFTorchInferenceModel(HFInferenceModel):
             device_map: Dict[str, Union[str, int]] = {}
 
             @functools.lru_cache(maxsize=None)
-            def get_original_key(key):
-                return max(
-                    (
-                        original_key
-                        for original_key in utils.module_names
-                        if original_key.endswith(key)
-                    ),
-                    key=len,
-                )
+            def get_original_key(key) -> Optional[str]:
+                key_candidates = [
+                    original_key
+                    for original_key in utils.module_names
+                    if original_key.endswith(key)
+                ]
+
+                if not key_candidates:
+                    logger.debug(f"!!! No key candidates for {key}")
+                    return None
+
+                return max(key_candidates, key=len)
 
             for key, value in model_dict.items():
                 original_key = get_original_key(key)
 
+                if not original_key:
+                    continue
+
                 if isinstance(value, lazy_loader.LazyTensor) and not any(
                     original_key.startswith(n) for n in utils.layers_module_names
                 ):
diff --git a/requirements.txt b/requirements.txt
index 4eb2c282..28fdb28c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.28.0
+transformers==4.29.*
 huggingface_hub==0.12.1
 Flask==2.2.3
 Flask-SocketIO==5.3.2
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 1b40fded..7fc866f0 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,7 @@ requests
 dm-haiku==0.0.9
 jax==0.3.25
 jaxlib==0.3.25
-transformers == 4.28.0
+transformers==4.29.*
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2
diff --git a/static/koboldai.js b/static/koboldai.js
index ab7f7832..de3ab324 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -3991,7 +3991,7 @@ function update_context(data) {
 					document.getElementById('world_info_'+entry.uid).classList.add("used_in_game");
 				}
 				break;
-			case 'memory':
+			case 'genre':
 				genre_length += entry.tokens.length;
 				break;
 			case 'memory':