Patches: Make lazyload work on quantized

i wanna watch youtube while my model is loading without locking up my
system >:(
This commit is contained in:
somebody
2023-07-17 16:47:31 -05:00
parent e8d84bb787
commit 23b95343bd
2 changed files with 2 additions and 3 deletions

View File

@@ -78,7 +78,6 @@ class model_backend(HFTorchInferenceModel):
}
if self.use_4_bit:
self.lazy_load = False
tf_kwargs.update({
"quantization_config":BitsAndBytesConfig(
load_in_4bit=True,