Merge pull request #402 from one-some/united

Patches: Make lazyload work with quantization
This commit is contained in:
henk717
2023-07-17 23:53:14 +02:00
committed by GitHub
2 changed files with 2 additions and 3 deletions

View File

@@ -78,7 +78,6 @@ class model_backend(HFTorchInferenceModel):
}
if self.use_4_bit:
self.lazy_load = False
tf_kwargs.update({
"quantization_config":BitsAndBytesConfig(
load_in_4bit=True,

View File

@@ -181,7 +181,7 @@ class LazyloadPatches:
is_quantized = is_quantized or load_in_8bit
if is_quantized:
from .utils.bitsandbytes import set_module_8bit_tensor_to_device
from transformers.utils.bitsandbytes import set_module_quantized_tensor_to_device
error_msgs = []
@@ -299,7 +299,7 @@ class LazyloadPatches:
fp16_statistics = None
if "SCB" not in param_name:
set_module_8bit_tensor_to_device(
set_module_quantized_tensor_to_device(
model,
param_name,
param_device,