From 1ef515f4c22fc48241f0b825bb47004df17990f9 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 17 Apr 2023 07:21:18 +0200 Subject: [PATCH] Fix lazy-loading on 4-bit --- modeling/inference_models/hf_torch.py | 19 +++++++++++-------- modeling/inference_models/hf_torch_4bit.py | 2 -- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index a2b2ff80..53b02e6d 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -412,14 +412,17 @@ class HFTorchInferenceModel(HFInferenceModel): @functools.lru_cache(maxsize=None) def get_original_key(key): - return max( - ( - original_key - for original_key in utils.module_names - if original_key.endswith(key) - ), - key=len, - ) + try: + return max( + ( + original_key + for original_key in utils.module_names + if original_key.endswith(key) + ), + key=len, + ) + except ValueError: + return key for key, value in model_dict.items(): original_key = get_original_key(key) diff --git a/modeling/inference_models/hf_torch_4bit.py b/modeling/inference_models/hf_torch_4bit.py index 21f4ebfe..4b02d642 100644 --- a/modeling/inference_models/hf_torch_4bit.py +++ b/modeling/inference_models/hf_torch_4bit.py @@ -104,8 +104,6 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel): ) utils.koboldai_vars.model = self.model_name - self.lazy_load = False - self.init_model_config() gpulayers = utils.args.breakmodel_gpulayers