mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix lazy-loading on 4-bit
This commit is contained in:
@@ -412,14 +412,17 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_original_key(key):
|
||||
return max(
|
||||
(
|
||||
original_key
|
||||
for original_key in utils.module_names
|
||||
if original_key.endswith(key)
|
||||
),
|
||||
key=len,
|
||||
)
|
||||
try:
|
||||
return max(
|
||||
(
|
||||
original_key
|
||||
for original_key in utils.module_names
|
||||
if original_key.endswith(key)
|
||||
),
|
||||
key=len,
|
||||
)
|
||||
except ValueError:
|
||||
return key
|
||||
|
||||
for key, value in model_dict.items():
|
||||
original_key = get_original_key(key)
|
||||
|
@@ -104,8 +104,6 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel):
|
||||
)
|
||||
utils.koboldai_vars.model = self.model_name
|
||||
|
||||
self.lazy_load = False
|
||||
|
||||
self.init_model_config()
|
||||
|
||||
gpulayers = utils.args.breakmodel_gpulayers
|
||||
|
Reference in New Issue
Block a user