From d53726bed610d03ec4b3edf3613c72f3754a7fba Mon Sep 17 00:00:00 2001 From: Bogdan Drema Date: Mon, 8 May 2023 18:24:34 +0100 Subject: [PATCH 01/12] fix: tpu tokenizers errors --- modeling/inference_models/hf.py | 2 +- modeling/inference_models/hf_mtj.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index cd609fed..37f473ca 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -59,7 +59,7 @@ class HFInferenceModel(InferenceModel): token_ids = [first] elif len(token_ids) > 0: first = int(token_ids[0]) - elif token_ids: + elif token_ids is not None and len(token_ids) > 0: first = token_ids[0] result = original_decode(self, token_ids, *args, **kwargs) if first is not None and first in has_prefix_space: diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 7661a67f..d7035cbf 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -17,6 +17,7 @@ from modeling.inference_model import ( ModelCapabilities, ) from modeling.inference_models.hf import HFInferenceModel +from modeling.tokenizer import GenericTokenizer # This file shouldn't be imported unless using the TPU assert utils.koboldai_vars.use_colab_tpu @@ -193,8 +194,7 @@ class HFMTJInferenceModel(HFInferenceModel): utils.koboldai_vars.modeldim = int( tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]) ) - - self.tokenizer = tpu_mtj_backend.tokenizer + self.tokenizer = GenericTokenizer(tpu_mtj_backend.tokenizer) if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default From a9e342ca64f8376e85d92beb9e65d246ec3997a8 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 8 May 2023 17:10:47 -0500 Subject: [PATCH 02/12] Fix TPU API errors --- aiserver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiserver.py b/aiserver.py index e744d18e..ef49f05c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3708,6 +3708,7 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum): soft_tokens=soft_tokens, sampler_order=koboldai_vars.sampler_order, ) + genout = np.array(genout) genout = [utils.applyoutputformatting(utils.decodenewlines(tokenizer.decode(txt))) for txt in genout] return genout From 9fdc2f73a63e1f6fd64fdad06f37aef1f97b0adc Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 9 May 2023 20:59:10 +0200 Subject: [PATCH 03/12] ROCM Downgrade for stability --- environments/rocm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index 51b3e852..81e32a58 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -25,7 +25,7 @@ dependencies: - psutil - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 - - torch==2.0.* + - torch==1.13.* - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors From 702f59b2dbd458ccc9426cee0226740870a62b36 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 9 May 2023 22:10:01 +0200 Subject: [PATCH 04/12] Downgrade ROCM properly --- environments/rocm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index 81e32a58..a33a8f96 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -24,8 +24,8 @@ dependencies: - Pillow - psutil - pip: - - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 - - torch==1.13.* + - --extra-index-url https://download.pytorch.org/whl/rocm5.2 + - torch==1.13.1+rocm5.2 - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors From 546ba84723c84dec3a6f8cc70e41408fd66efa67 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 10 May 2023 19:10:23 -0500 Subject: [PATCH 05/12] Fix memory->genre bug in context viewer bar tooltip Crazy change I know --- static/koboldai.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index cfc32d21..87beb954 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -4006,7 +4006,7 @@ function update_context(data) { document.getElementById('world_info_'+entry.uid).classList.add("used_in_game"); } break; - case 'memory': + case 'genre': genre_length += entry.tokens.length; break; case 'memory': From 84e4cb0f4a216e58063cf1f61a0adb0c7b27124a Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 13:44:53 +0200 Subject: [PATCH 06/12] Update Transformers --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 1cc5a9c7..3d0ca633 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index a33a8f96..eb2927bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index 4eb2c282..28fdb28c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.28.0 +transformers==4.29.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 1b40fded..7fc866f0 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers == 4.28.0 +transformers==4.29.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From e932364a1e3efe0c6973f1a19f4093115068c77d Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 14:56:12 +0200 Subject: [PATCH 07/12] RWKV support --- aiserver.py | 31 ++-- modeling/inference_models/rwkv.py | 237 ------------------------------ 2 files changed, 11 insertions(+), 257 deletions(-) delete mode 100644 modeling/inference_models/rwkv.py diff --git a/aiserver.py b/aiserver.py index ef49f05c..b045ea71 100644 --- a/aiserver.py +++ b/aiserver.py @@ -136,7 +136,6 @@ class MenuModelType(Enum): HUGGINGFACE = 0 ONLINE_API = 1 OTHER = 2 - RWKV = 3 class MenuItem: def __init__( @@ -222,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - MenuFolder("Untuned RWKV-4 (Experimental)", "rwkvlist", experimental=True), + MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), @@ -349,16 +348,16 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV-4 14B ctx4096", "rwkv-4-pile-14b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 14B ctx1024", "rwkv-4-pile-14b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx4096", "rwkv-4-pile-7b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx1024", "rwkv-4-pile-7b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx4096", "rwkv-4-pile-3b:ctx4096", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx1024", "rwkv-4-pile-3b", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx4096", "rwkv-4-pile-1b5:ctx4096", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx1024", "rwkv-4-pile-1b5", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 340M", "rwkv-4-pile-430m", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 169M ctx1024", "rwkv-4-pile-169m", "?GB", model_type=MenuModelType.RWKV), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ @@ -1567,8 +1566,6 @@ def get_model_info(model, directory=""): print(":(") pass key = True - elif "rwkv" in model.lower(): - pass elif model == 'ReadOnly': pass #elif model == 'customhuggingface': @@ -1946,12 +1943,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal model.load(initial_load=initial_load) # TODO: This check sucks, make a model object or somethign - elif "rwkv" in koboldai_vars.model: - if koboldai_vars.use_colab_tpu: - raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import RWKVInferenceModel - model = RWKVInferenceModel(koboldai_vars.model) - model.load() elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: # HF Torch logger.init("Transformers", status='Starting') diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py deleted file mode 100644 index 006bb8fd..00000000 --- a/modeling/inference_models/rwkv.py +++ /dev/null @@ -1,237 +0,0 @@ -from __future__ import annotations -import os - - -import time -from typing import Dict, List, Optional, Union -import numpy as np -import requests -from tokenizers import Tokenizer -from tqdm import tqdm -from huggingface_hub import hf_hub_url - -import torch -from torch.nn import functional as F - -# Must be defined before import -os.environ["RWKV_JIT_ON"] = "1" -# TODO: Include compiled kernel -os.environ["RWKV_CUDA_ON"] = "1" -from rwkv.model import RWKV - -import utils -from logger import logger - -from modeling import warpers -from modeling.warpers import Warper -from modeling.stoppers import Stoppers -from modeling.post_token_hooks import PostTokenHooks -from modeling.tokenizer import GenericTokenizer -from modeling.inference_model import ( - GenerationResult, - GenerationSettings, - InferenceModel, - ModelCapabilities, -) - -TOKENIZER_URL = ( - "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json" -) -TOKENIZER_PATH = "models/rwkv/20b_tokenizer.json" - -REPO_OWNER = "BlinkDL" -MODEL_FILES = { - "rwkv-4-pile-14b": "RWKV-4-Pile-14B-20230213-8019.pth", - # NOTE: Still in progress(?) - "rwkv-4-pile-14b:ctx4096": "RWKV-4-Pile-14B-20230228-ctx4096-test663.pth", - "rwkv-4-pile-7b": "RWKV-4-Pile-7B-20221115-8047.pth", - "rwkv-4-pile-7b:ctx4096": "RWKV-4-Pile-7B-20230109-ctx4096.pth", - "rwkv-4-pile-3b": "RWKV-4-Pile-3B-20221008-8023.pth", - "rwkv-4-pile-3b:ctx4096": "RWKV-4-Pile-3B-20221110-ctx4096.pth", - "rwkv-4-pile-1b5": "RWKV-4-Pile-1B5-20220903-8040.pth", - "rwkv-4-pile-1b5:ctx4096": "RWKV-4-Pile-1B5-20220929-ctx4096.pth", - "rwkv-4-pile-430m": "RWKV-4-Pile-430M-20220808-8066.pth", - "rwkv-4-pile-169m": "RWKV-4-Pile-169M-20220807-8023.pth", -} - - -class RWKVInferenceModel(InferenceModel): - def __init__( - self, - model_name: str, - ) -> None: - super().__init__() - self.model_name = model_name - - self.post_token_hooks = [ - PostTokenHooks.stream_tokens, - ] - - self.stopper_hooks = [ - Stoppers.core_stopper, - Stoppers.dynamic_wi_scanner, - Stoppers.singleline_stopper, - Stoppers.chat_mode_stopper, - Stoppers.stop_sequence_stopper, - ] - - self.capabilties = ModelCapabilities( - embedding_manipulation=False, - post_token_hooks=True, - stopper_hooks=True, - post_token_probs=True, - ) - self._old_stopping_criteria = None - - def _ensure_directory_structure(self) -> None: - for path in ["models/rwkv", "models/rwkv/models"]: - try: - os.mkdir(path) - except FileExistsError: - pass - - def _get_tokenizer(self) -> GenericTokenizer: - if not os.path.exists(TOKENIZER_PATH): - logger.info("RWKV tokenizer not found, downloading...") - - r = requests.get(TOKENIZER_URL) - with open(TOKENIZER_PATH, "wb") as file: - file.write(r.content) - - return GenericTokenizer(Tokenizer.from_file(TOKENIZER_PATH)) - - def _download_model(self, model_path: str, model_class: str) -> None: - logger.info(f"{self.model_name} not found, downloading...") - - url = hf_hub_url( - repo_id=f"{REPO_OWNER}/{model_class}", - filename=MODEL_FILES[self.model_name], - ) - - # TODO: Use aria2 - # https://stackoverflow.com/a/57030446 - with requests.get(url, stream=True) as r: - r.raise_for_status() - bar = tqdm( - desc="Downloading RWKV Model", - unit="B", - unit_scale=True, - total=int(r.headers["Content-Length"]), - ) - with open(model_path, "wb") as file: - for chunk in r.iter_content(chunk_size=8192): - if not chunk: - continue - file.write(chunk) - bar.update(len(chunk)) - - def _load(self, save_model: bool, initial_load: bool) -> None: - self._ensure_directory_structure() - self.tokenizer = self._get_tokenizer() - - # Parse model name - model_class, _, special = self.model_name.partition(":") - special = special or None - - model_dir = os.path.join("models", "rwkv", "models", model_class) - if not os.path.exists(model_dir): - os.mkdir(model_dir) - - # Download model if we need to - model_path = os.path.join(model_dir, MODEL_FILES[self.model_name]) - if not os.path.exists(model_path): - self._download_model(model_path, model_class) - - # Now we load! - - # TODO: Breakmodel to strat - self.model = RWKV(model=model_path, strategy="cuda:0 fp16") - - def _apply_warpers( - self, scores: torch.Tensor, input_ids: torch.Tensor - ) -> torch.Tensor: - warpers.update_settings() - for sid in utils.koboldai_vars.sampler_order: - warper = Warper.from_id(sid) - - if not warper.value_is_valid(): - continue - - if warper == warpers.RepetitionPenalty: - # Rep pen needs more data than other samplers - scores = warper.torch(scores, input_ids=input_ids) - else: - scores = warper.torch(scores) - return scores - - def _sample_token(self, logits: torch.Tensor, input_ids: torch.Tensor) -> int: - probs = F.softmax(logits.float(), dim=-1) - - if probs.device == torch.device("cpu"): - probs = probs.numpy() - sorted_ids = np.argsort(probs) - sorted_probs = probs[sorted_ids][::-1] - - probs = self._apply_warpers(probs[None, :], input_ids) - - # TODO: is this right? - probs[probs == -torch.inf] = 0.0 - - probs = probs / np.sum(probs) - out = np.random.choice(a=len(probs), p=probs) - return int(out) - else: - sorted_ids = torch.argsort(probs) - sorted_probs = probs[sorted_ids] - sorted_probs = torch.flip(sorted_probs, dims=(0,)) - - probs = self._apply_warpers(probs[None, :], input_ids) - - # TODO: is this right? - probs[probs == -torch.inf] = 0.0 - - out = torch.multinomial(probs, num_samples=1)[0] - return int(out) - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - if seed is not None: - torch.manual_seed(seed) - - aux_device = utils.get_auxilary_device() - context = torch.tensor(prompt_tokens)[None, :].to(aux_device) - out = [] - - start_time = time.time() - with torch.no_grad(): - logits, state = self.model.forward(prompt_tokens, None) - last_token = prompt_tokens[-1] - - for _ in range(max_new): - - logits, state = self.model.forward([last_token], state) - last_token = self._sample_token(logits, context) - out.append(last_token) - add = torch.tensor([[last_token]]).to(aux_device) - context = torch.cat((context, add), dim=-1) - self._post_token_gen(context) - - logger.debug( - "torch_raw_generate: run generator {}s".format(time.time() - start_time) - ) - - return GenerationResult( - self, - out_batches=torch.tensor([out]), - prompt=prompt_tokens, - is_whole_generation=False, - output_includes_prompt=True, - ) From edd9c7d782c9c59f9052f41e9f21498d2cdcaef2 Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 15:13:59 +0200 Subject: [PATCH 08/12] Warning polish --- koboldai_settings.py | 3 ++- modeling/inference_model.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index d8416df2..7bc88422 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1129,7 +1129,7 @@ class story_settings(settings): class user_settings(settings): local_only_variables = ['importjs'] - no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"] + no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision", "model_selected"] settings_name = "user" def __init__(self, socketio): self._socketio = socketio @@ -1185,6 +1185,7 @@ class user_settings(settings): self.horde_api_key = "0000000000" self.horde_worker_name = "My Awesome Instance" self.horde_url = "https://horde.koboldai.net" + self.model_selected = "" def __setattr__(self, name, value): new_variable = name not in self.__dict__ diff --git a/modeling/inference_model.py b/modeling/inference_model.py index b253c5bf..e2329cf9 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -218,7 +218,7 @@ class InferenceModel: try: return GenericTokenizer(try_get_tokenizer()) except Exception as e: - logger.warning(f"Tokenizer falling back due to {e}") + logger.warning(f"Tokenizer falling back due to {e} (This can be normal behavior for some architectures that lack a slow tokenizer such as NeoX)") # If we error on each attempt, raise the last one if i == len(suppliers) - 1: raise From 20b54eb9ff829526161c2822ada507b6c80bee41 Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 19:06:39 +0200 Subject: [PATCH 09/12] Revert 4.29 due to unforseen consequences --- aiserver.py | 2 +- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index b045ea71..1abdd31e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -221,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - MenuFolder("Official RWKV-4", "rwkvlist"), + #MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 3d0ca633..af16423e 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index eb2927bd..ffcacfb6 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index 28fdb28c..c98b7252 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.29.* +transformers==4.28.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 7fc866f0..b41b7ead 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers==4.29.* +transformers==4.28.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From c16336f6467fe11a8644b551d5700986d2ef4bf6 Mon Sep 17 00:00:00 2001 From: somebody Date: Thu, 11 May 2023 17:10:19 -0500 Subject: [PATCH 10/12] Add traceback to debug log on fallback --- modeling/inference_models/hf_torch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 990fabfc..14ddd7af 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -332,10 +332,13 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") + logger.debug(traceback.format_exc()) + try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: logger.warning(f"Fell back to GPTNeoForCausalLM due to {e}") + logger.debug(traceback.format_exc()) return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs) def get_hidden_size(self) -> int: From 3065c1b40e758993565ea212ccf9f3b0db5c7f0e Mon Sep 17 00:00:00 2001 From: somebody Date: Thu, 11 May 2023 17:10:43 -0500 Subject: [PATCH 11/12] Ignore missing keys in get_original_key --- modeling/inference_models/hf_torch.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 14ddd7af..3f7c3967 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -465,19 +465,25 @@ class HFTorchInferenceModel(HFInferenceModel): device_map: Dict[str, Union[str, int]] = {} @functools.lru_cache(maxsize=None) - def get_original_key(key): - return max( - ( - original_key - for original_key in utils.module_names - if original_key.endswith(key) - ), - key=len, - ) + def get_original_key(key) -> Optional[str]: + key_candidates = [ + original_key + for original_key in utils.module_names + if original_key.endswith(key) + ] + + if not key_candidates: + logger.debug(f"!!! No key candidates for {key}") + return None + + return max(key_candidates, key=len) for key, value in model_dict.items(): original_key = get_original_key(key) + if not original_key: + continue + if isinstance(value, lazy_loader.LazyTensor) and not any( original_key.startswith(n) for n in utils.layers_module_names ): From 67df9b917f6a84445520e89a04080e8553356b15 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 12 May 2023 09:08:07 +0200 Subject: [PATCH 12/12] Reintroduce 4.29 Transformers --- aiserver.py | 2 +- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 1abdd31e..b045ea71 100644 --- a/aiserver.py +++ b/aiserver.py @@ -221,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - #MenuFolder("Official RWKV-4", "rwkvlist"), + MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), diff --git a/environments/huggingface.yml b/environments/huggingface.yml index af16423e..3d0ca633 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.* + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index ffcacfb6..eb2927bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.* + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index c98b7252..28fdb28c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.28.* +transformers==4.29.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index b41b7ead..7fc866f0 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers==4.28.* +transformers==4.29.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2