Permit CPU layers on 4-bit (Worse than GGML)

This commit is contained in:
Henk
2023-07-18 21:44:34 +02:00
parent 5f2600d338
commit 22e7baec52

View File

@@ -88,7 +88,8 @@ class model_backend(HFTorchInferenceModel):
load_in_4bit=True, load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16, bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True, bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4' bnb_4bit_quant_type='nf4',
llm_int8_enable_fp32_cpu_offload=True
), ),
}) })