diff --git a/aiserver.py b/aiserver.py index a306449e..92dde7f4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -136,7 +136,6 @@ class MenuModelType(Enum): HUGGINGFACE = 0 ONLINE_API = 1 OTHER = 2 - RWKV = 3 class MenuItem: def __init__( @@ -243,7 +242,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - MenuFolder("Untuned RWKV-4 (Experimental)", "rwkvlist", experimental=True), + MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), @@ -370,16 +369,16 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV-4 14B ctx4096", "rwkv-4-pile-14b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 14B ctx1024", "rwkv-4-pile-14b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx4096", "rwkv-4-pile-7b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx1024", "rwkv-4-pile-7b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx4096", "rwkv-4-pile-3b:ctx4096", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx1024", "rwkv-4-pile-3b", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx4096", "rwkv-4-pile-1b5:ctx4096", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx1024", "rwkv-4-pile-1b5", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 340M", "rwkv-4-pile-430m", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 169M ctx1024", "rwkv-4-pile-169m", "?GB", model_type=MenuModelType.RWKV), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ @@ -3366,6 +3365,7 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum): soft_tokens=soft_tokens, sampler_order=koboldai_vars.sampler_order, ) + genout = np.array(genout) genout = [utils.applyoutputformatting(utils.decodenewlines(tokenizer.decode(txt))) for txt in genout] return genout diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 1cc5a9c7..3d0ca633 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index 51b3e852..eb2927bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -24,13 +24,13 @@ dependencies: - Pillow - psutil - pip: - - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 - - torch==2.0.* + - --extra-index-url https://download.pytorch.org/whl/rocm5.2 + - torch==1.13.1+rocm5.2 - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/koboldai_settings.py b/koboldai_settings.py index e9562ffc..f3aa0ca9 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1128,7 +1128,7 @@ class story_settings(settings): class user_settings(settings): local_only_variables = ['importjs'] - no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"] + no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision", "model_selected"] settings_name = "user" def __init__(self, socketio): self._socketio = socketio @@ -1184,6 +1184,7 @@ class user_settings(settings): self.horde_api_key = "0000000000" self.horde_worker_name = "My Awesome Instance" self.horde_url = "https://horde.koboldai.net" + self.model_selected = "" def __setattr__(self, name, value): new_variable = name not in self.__dict__ diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 343eb39a..4a29a027 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -231,7 +231,7 @@ class InferenceModel: try: return GenericTokenizer(try_get_tokenizer()) except Exception as e: - logger.warning(f"Tokenizer falling back due to {e}") + logger.warning(f"Tokenizer falling back due to {e} (This can be normal behavior for some architectures that lack a slow tokenizer such as NeoX)") # If we error on each attempt, raise the last one if i == len(suppliers) - 1: raise diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 759feb65..4e82d348 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -17,6 +17,7 @@ from modeling.inference_model import ( ModelCapabilities, ) from modeling.inference_models.parents.hf import HFInferenceModel +from modeling.tokenizer import GenericTokenizer @@ -197,8 +198,7 @@ class model_loader(HFInferenceModel): utils.koboldai_vars.modeldim = int( tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]) ) - - self.tokenizer = tpu_mtj_backend.tokenizer + self.tokenizer = GenericTokenizer(tpu_mtj_backend.tokenizer) if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 03955d88..ba291c3f 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -173,8 +173,89 @@ class HFInferenceModel(InferenceModel): def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults if utils.koboldai_vars.model_type == "llama": - self.tokenizer.decode_with_prefix_space = True + # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False + + # HF transformers no longer supports decode_with_prefix_space + # We work around this by wrapping decode, encode, and __call__ + # with versions that work around the 'prefix space' misfeature + # of sentencepiece. + vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size)) + has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")} + + # Wrap 'decode' with a method that always returns text starting with a space + # when the head token starts with a space. This is what 'decode_with_prefix_space' + # used to do, and we implement it using the same technique (building a cache of + # tokens that should have a prefix space, and then prepending a space if the first + # token is in this set.) We also work around a bizarre behavior in which decoding + # a single token 13 behaves differently than decoding a squence containing only [13]. + original_decode = type(self.tokenizer.tokenizer).decode + def decode_wrapper(self, token_ids, *args, **kwargs): + first = None + # Note, the code below that wraps single-value token_ids in a list + # is to work around this wonky behavior: + # >>> t.decode(13) + # '<0x0A>' + # >>> t.decode([13]) + # '\n' + # Not doing this causes token streaming to receive <0x0A> characters + # instead of newlines. + if isinstance(token_ids, int): + first = token_ids + token_ids = [first] + elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor + # Tensors don't support the Python standard of 'empty is False' + # and the special case of dimension 0 tensors also needs to be + # handled separately. + if token_ids.dim() == 0: + first = int(token_ids.item()) + token_ids = [first] + elif len(token_ids) > 0: + first = int(token_ids[0]) + elif token_ids is not None and len(token_ids) > 0: + first = token_ids[0] + result = original_decode(self, token_ids, *args, **kwargs) + if first is not None and first in has_prefix_space: + result = " " + result + return result + # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it + object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer)) + + # Wrap encode and __call__ to work around the 'prefix space' misfeature also. + # The problem is that "Bob" at the start of text is encoded as if it is + # " Bob". This creates a problem because it means you can't split text, encode + # the pieces, concatenate the tokens, decode them, and get the original text back. + # The workaround is to prepend a known token that (1) starts with a space; and + # (2) is not the prefix of any other token. After searching through the vocab + # " ," (space comma) is the only token containing only printable ascii characters + # that fits this bill. By prepending ',' to the text, the original encode + # method always returns [1919, ...], where the tail of the sequence is the + # actual encoded result we want without the prefix space behavior. + original_encode = type(self.tokenizer.tokenizer).encode + def encode_wrapper(self, text, *args, **kwargs): + if type(text) is str: + text = ',' + text + result = original_encode(self, text, *args, **kwargs) + result = result[1:] + else: + result = original_encode(self, text, *args, **kwargs) + return result + object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer)) + + # Since 'encode' is documented as being deprecated, also override __call__. + # This doesn't appear to currently be used by KoboldAI, but doing so + # in case someone uses it in the future. + original_call = type(self.tokenizer.tokenizer).__call__ + def call_wrapper(self, text, *args, **kwargs): + if type(text) is str: + text = ',' + text + result = original_call(self, text, *args, **kwargs) + result = result[1:] + else: + result = original_call(self, text, *args, **kwargs) + return result + object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) + elif utils.koboldai_vars.model_type == "opt": self.tokenizer._koboldai_header = self.tokenizer.encode("") self.tokenizer.add_bos_token = False @@ -259,4 +340,4 @@ class HFInferenceModel(InferenceModel): logger.warning( "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" ) - utils.koboldai_vars.model_type = "gpt_neo" + utils.koboldai_vars.model_type = "gpt_neo" \ No newline at end of file diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index aae3ada3..f0a4a66e 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -289,11 +289,13 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") - logger.debug(traceback_string) + logger.debug(traceback.format_exc()) + try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: logger.warning(f"Fell back to GPTNeoForCausalLM due to {e}") + logger.debug(traceback.format_exc()) return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs) def get_hidden_size(self) -> int: @@ -420,19 +422,25 @@ class HFTorchInferenceModel(HFInferenceModel): device_map: Dict[str, Union[str, int]] = {} @functools.lru_cache(maxsize=None) - def get_original_key(key): - return max( - ( - original_key - for original_key in utils.module_names - if original_key.endswith(key) - ), - key=len, - ) + def get_original_key(key) -> Optional[str]: + key_candidates = [ + original_key + for original_key in utils.module_names + if original_key.endswith(key) + ] + + if not key_candidates: + logger.debug(f"!!! No key candidates for {key}") + return None + + return max(key_candidates, key=len) for key, value in model_dict.items(): original_key = get_original_key(key) + if not original_key: + continue + if isinstance(value, lazy_loader.LazyTensor) and not any( original_key.startswith(n) for n in utils.layers_module_names ): diff --git a/requirements.txt b/requirements.txt index 4eb2c282..28fdb28c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.28.0 +transformers==4.29.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 1b40fded..7fc866f0 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers == 4.28.0 +transformers==4.29.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 diff --git a/static/koboldai.js b/static/koboldai.js index ab7f7832..de3ab324 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -3991,7 +3991,7 @@ function update_context(data) { document.getElementById('world_info_'+entry.uid).classList.add("used_in_game"); } break; - case 'memory': + case 'genre': genre_length += entry.tokens.length; break; case 'memory':