Fix lazy-loading on 4-bit

This commit is contained in:
0cc4m
2023-04-17 07:21:18 +02:00
parent 4d34f9b7de
commit 1ef515f4c2
2 changed files with 11 additions and 10 deletions

View File

@@ -412,14 +412,17 @@ class HFTorchInferenceModel(HFInferenceModel):
@functools.lru_cache(maxsize=None)
def get_original_key(key):
return max(
(
original_key
for original_key in utils.module_names
if original_key.endswith(key)
),
key=len,
)
try:
return max(
(
original_key
for original_key in utils.module_names
if original_key.endswith(key)
),
key=len,
)
except ValueError:
return key
for key, value in model_dict.items():
original_key = get_original_key(key)

View File

@@ -104,8 +104,6 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel):
)
utils.koboldai_vars.model = self.model_name
self.lazy_load = False
self.init_model_config()
gpulayers = utils.args.breakmodel_gpulayers