mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #402 from one-some/united
Patches: Make lazyload work with quantization
This commit is contained in:
@@ -78,7 +78,6 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if self.use_4_bit:
|
if self.use_4_bit:
|
||||||
self.lazy_load = False
|
|
||||||
tf_kwargs.update({
|
tf_kwargs.update({
|
||||||
"quantization_config":BitsAndBytesConfig(
|
"quantization_config":BitsAndBytesConfig(
|
||||||
load_in_4bit=True,
|
load_in_4bit=True,
|
||||||
|
@@ -181,7 +181,7 @@ class LazyloadPatches:
|
|||||||
is_quantized = is_quantized or load_in_8bit
|
is_quantized = is_quantized or load_in_8bit
|
||||||
|
|
||||||
if is_quantized:
|
if is_quantized:
|
||||||
from .utils.bitsandbytes import set_module_8bit_tensor_to_device
|
from transformers.utils.bitsandbytes import set_module_quantized_tensor_to_device
|
||||||
|
|
||||||
error_msgs = []
|
error_msgs = []
|
||||||
|
|
||||||
@@ -299,7 +299,7 @@ class LazyloadPatches:
|
|||||||
fp16_statistics = None
|
fp16_statistics = None
|
||||||
|
|
||||||
if "SCB" not in param_name:
|
if "SCB" not in param_name:
|
||||||
set_module_8bit_tensor_to_device(
|
set_module_quantized_tensor_to_device(
|
||||||
model,
|
model,
|
||||||
param_name,
|
param_name,
|
||||||
param_device,
|
param_device,
|
||||||
|
Reference in New Issue
Block a user