AutoGPTQ fallback

This commit is contained in:
Henk
2023-08-10 17:18:53 +02:00
parent 1b253ce95f
commit 54addfc234

View File

@@ -323,6 +323,7 @@ class model_backend(HFTorchInferenceModel):
enable=self.lazy_load, enable=self.lazy_load,
dematerialized_modules=False, dematerialized_modules=False,
): ):
try:
if model_type == "gptj": if model_type == "gptj":
model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias)
elif model_type == "gpt_neox": elif model_type == "gpt_neox":
@@ -336,6 +337,8 @@ class model_backend(HFTorchInferenceModel):
elif model_type == "gpt_bigcode": elif model_type == "gpt_bigcode":
model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half()
else: else:
raise RuntimeError("Model not supported by Occam's GPTQ")
except:
try: try:
import auto_gptq import auto_gptq
from auto_gptq import AutoGPTQForCausalLM from auto_gptq import AutoGPTQForCausalLM