Preliminary HF GPTQ changes

This commit is contained in:
Henk
2023-08-21 01:58:52 +02:00
parent 6f557befa9
commit 3dd0e91fbb
2 changed files with 32 additions and 30 deletions

View File

@@ -57,18 +57,19 @@ class model_backend(HFTorchInferenceModel):
temp = json.load(f) temp = json.load(f)
else: else:
temp = {} temp = {}
requested_parameters.append({ if not hasattr(self.model_config, 'quantization_config'):
"uitype": "dropdown", requested_parameters.append({
"unit": "text", "uitype": "dropdown",
"label": "Quantization", "unit": "text",
"id": "quantization", "label": "Quantization",
"default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit', "id": "quantization",
"tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode", "default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit',
"menu_path": "Layers", "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
"children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}], "menu_path": "Layers",
"extra_classes": "", "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
"refresh_model_inputs": False "extra_classes": "",
}) "refresh_model_inputs": False
})
else: else:
logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models") logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models")
return requested_parameters return requested_parameters
@@ -105,24 +106,25 @@ class model_backend(HFTorchInferenceModel):
"low_cpu_mem_usage": True, "low_cpu_mem_usage": True,
} }
if self.quantization == "8bit": if not hasattr(self.model_config, 'quantization_config'):
tf_kwargs.update({ if self.quantization == "8bit":
"quantization_config":BitsAndBytesConfig( tf_kwargs.update({
load_in_8bit=True, "quantization_config":BitsAndBytesConfig(
llm_int8_enable_fp32_cpu_offload=True load_in_8bit=True,
), llm_int8_enable_fp32_cpu_offload=True
}) ),
})
if self.quantization == "4bit" or utils.koboldai_vars.colab_arg: if self.quantization == "4bit" or utils.koboldai_vars.colab_arg:
tf_kwargs.update({ tf_kwargs.update({
"quantization_config":BitsAndBytesConfig( "quantization_config":BitsAndBytesConfig(
load_in_4bit=True, load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16, bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True, bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4', bnb_4bit_quant_type='nf4',
llm_int8_enable_fp32_cpu_offload=True llm_int8_enable_fp32_cpu_offload=True
), ),
}) })
if self.model_type == "gpt2": if self.model_type == "gpt2":
# We must disable low_cpu_mem_usage and if using a GPT-2 model # We must disable low_cpu_mem_usage and if using a GPT-2 model

View File

@@ -21,7 +21,7 @@ from pathlib import Path
model_backend_type = "GPTQ" model_backend_type = "GPTQ"
model_backend_name = "Huggingface GPTQ" model_backend_name = "Legacy GPTQ"
def load_model_gptq_settings(path): def load_model_gptq_settings(path):