diff --git a/.gitmodules b/.gitmodules index c6f4b308..4a1fb7c9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,7 @@ path = repos/gptq url = https://github.com/0cc4m/GPTQ-for-LLaMa branch = a8303654c200c25577130466e5f9bc1e70fc8a50 +[submodule "repos/hf_bleeding_edge"] + path = repos/hf_bleeding_edge + url = https://github.com/0cc4m/hf_bleeding_edge + branch = b5d0b80c6947605b9ccf080fc17b68a516ea5857 diff --git a/aiserver.py b/aiserver.py index 80518450..bb6cc171 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1621,7 +1621,7 @@ def get_layer_count(model, directory=""): else: if(directory): model = directory - from transformers import AutoConfig + from repos.hf_bleeding_edge import AutoConfig if(os.path.isdir(model.replace('/', '_'))): model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") elif(is_model_downloaded(model)): diff --git a/environments/huggingface.yml b/environments/huggingface.yml index e5fb939c..a179c468 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -49,3 +49,4 @@ dependencies: - diffusers - --find-links=https://0cc4m.github.io/KoboldAI/gptq-whl-links.html - quant_cuda + - einops diff --git a/environments/rocm.yml b/environments/rocm.yml index 9358575d..d0daf4f2 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -44,3 +44,4 @@ dependencies: - diffusers - --find-links=https://0cc4m.github.io/KoboldAI/gptq-whl-links.html - quant_rocm + - einops diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index 9e30a7fd..61004db5 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -6,7 +6,8 @@ import torch import shutil from typing import Union -from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, GPT2LMHeadModel +from transformers import GPTNeoForCausalLM, GPT2LMHeadModel +from repos.hf_bleeding_edge import AutoModelForCausalLM import utils import modeling.lazy_loader as lazy_loader diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 99e55be4..8c797940 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -1,6 +1,6 @@ import os from typing import Optional -from transformers import AutoConfig +from repos.hf_bleeding_edge import AutoConfig import utils import koboldai_settings diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 3cc28291..e0081c90 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -19,9 +19,9 @@ from transformers import ( StoppingCriteria, GPTNeoForCausalLM, GPT2LMHeadModel, - AutoModelForCausalLM, LogitsProcessorList, ) +from repos.hf_bleeding_edge import AutoModelForCausalLM import utils import modeling.lazy_loader as lazy_loader diff --git a/modeling/inference_models/hf_torch_4bit.py b/modeling/inference_models/hf_torch_4bit.py index 5eb8d60c..75fb9ddf 100644 --- a/modeling/inference_models/hf_torch_4bit.py +++ b/modeling/inference_models/hf_torch_4bit.py @@ -9,7 +9,8 @@ import shutil import sys from typing import Union -from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, AutoTokenizer, LlamaTokenizer +from transformers import GPTNeoForCausalLM, AutoTokenizer, LlamaTokenizer +from repos.hf_bleeding_edge import AutoModelForCausalLM import utils import modeling.lazy_loader as lazy_loader diff --git a/repos/__init__.py b/repos/__init__.py new file mode 100644 index 00000000..af438273 --- /dev/null +++ b/repos/__init__.py @@ -0,0 +1 @@ +from . import hf_bleeding_edge diff --git a/repos/hf_bleeding_edge b/repos/hf_bleeding_edge new file mode 160000 index 00000000..b5d0b80c --- /dev/null +++ b/repos/hf_bleeding_edge @@ -0,0 +1 @@ +Subproject commit b5d0b80c6947605b9ccf080fc17b68a516ea5857 diff --git a/utils.py b/utils.py index 13ebb6a3..89b9fb4f 100644 --- a/utils.py +++ b/utils.py @@ -184,7 +184,7 @@ def decodenewlines(txt): # Returns number of layers given an HF model config #==================================================================# def num_layers(config): - return config["n_layer"] if isinstance(config, dict) else config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else None + return config["n_layer"] if isinstance(config, dict) else config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else config.n_layers if hasattr(config, "n_layers") else None #==================================================================# # Downloads huggingface checkpoints using aria2c if possible