mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #398 from Alephrin/patch-1
Speeds up bnb 4bit with a custom BitsAndBytesConfig
This commit is contained in:
@@ -6,7 +6,7 @@ import torch
|
|||||||
import shutil
|
import shutil
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, GPT2LMHeadModel
|
from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, GPT2LMHeadModel, BitsAndBytesConfig
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
import modeling.lazy_loader as lazy_loader
|
import modeling.lazy_loader as lazy_loader
|
||||||
@@ -80,7 +80,12 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
if self.use_4_bit:
|
if self.use_4_bit:
|
||||||
self.lazy_load = False
|
self.lazy_load = False
|
||||||
tf_kwargs.update({
|
tf_kwargs.update({
|
||||||
"load_in_4bit": True,
|
"quantization_config":BitsAndBytesConfig(
|
||||||
|
load_in_4bit=True,
|
||||||
|
bnb_4bit_compute_dtype=torch.float16,
|
||||||
|
bnb_4bit_use_double_quant=True,
|
||||||
|
bnb_4bit_quant_type='nf4'
|
||||||
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
if self.model_type == "gpt2":
|
if self.model_type == "gpt2":
|
||||||
|
Reference in New Issue
Block a user