mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix lazy-loading on 4-bit
This commit is contained in:
@@ -412,14 +412,17 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
|
|
||||||
@functools.lru_cache(maxsize=None)
|
@functools.lru_cache(maxsize=None)
|
||||||
def get_original_key(key):
|
def get_original_key(key):
|
||||||
return max(
|
try:
|
||||||
(
|
return max(
|
||||||
original_key
|
(
|
||||||
for original_key in utils.module_names
|
original_key
|
||||||
if original_key.endswith(key)
|
for original_key in utils.module_names
|
||||||
),
|
if original_key.endswith(key)
|
||||||
key=len,
|
),
|
||||||
)
|
key=len,
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
return key
|
||||||
|
|
||||||
for key, value in model_dict.items():
|
for key, value in model_dict.items():
|
||||||
original_key = get_original_key(key)
|
original_key = get_original_key(key)
|
||||||
|
@@ -104,8 +104,6 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel):
|
|||||||
)
|
)
|
||||||
utils.koboldai_vars.model = self.model_name
|
utils.koboldai_vars.model = self.model_name
|
||||||
|
|
||||||
self.lazy_load = False
|
|
||||||
|
|
||||||
self.init_model_config()
|
self.init_model_config()
|
||||||
|
|
||||||
gpulayers = utils.args.breakmodel_gpulayers
|
gpulayers = utils.args.breakmodel_gpulayers
|
||||||
|
Reference in New Issue
Block a user