mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Permit CPU layers on 4-bit (Worse than GGML)
This commit is contained in:
@@ -88,7 +88,8 @@ class model_backend(HFTorchInferenceModel):
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type='nf4'
|
||||
bnb_4bit_quant_type='nf4',
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
|
||||
|
Reference in New Issue
Block a user