Merge pull request #398 from Alephrin/patch-1

Speeds up bnb 4bit with a custom BitsAndBytesConfig
This commit is contained in:
henk717
2023-07-17 13:22:44 +02:00
committed by GitHub

View File

@@ -6,7 +6,7 @@ import torch
import shutil import shutil
from typing import Union from typing import Union
from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, GPT2LMHeadModel from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, GPT2LMHeadModel, BitsAndBytesConfig
import utils import utils
import modeling.lazy_loader as lazy_loader import modeling.lazy_loader as lazy_loader
@@ -80,7 +80,12 @@ class model_backend(HFTorchInferenceModel):
if self.use_4_bit: if self.use_4_bit:
self.lazy_load = False self.lazy_load = False
tf_kwargs.update({ tf_kwargs.update({
"load_in_4bit": True, "quantization_config":BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4'
),
}) })
if self.model_type == "gpt2": if self.model_type == "gpt2":