From 554af7b1754fa2e574fbbcfa2a612b13969bda63 Mon Sep 17 00:00:00 2001 From: Llama <34464159+pi6am@users.noreply.github.com> Date: Sun, 27 Aug 2023 23:56:02 -0700 Subject: [PATCH] Modify exllama to load unrenamed gptq quantized models Read config.json and enable exllama loading if the model has a `quantization_config` with `quant_methdod` of `gptq`. Note that this implementation is limited and only supports model.safetensors. That said, this supports loading popular gptq quantized models without renaming or symlinking the model file. --- modeling/inference_models/exllama/class.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index 737afa88..67f54073 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -49,9 +49,16 @@ def load_model_gptq_settings(path): gptq_model = False gptq_file = False + gptq_in_config = False + + try: + if js['quantization_config']['quant_method'] == "gptq": + gptq_in_config = True + except: + pass gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors")) - if "gptq_bits" in js: + if "gptq_bits" in js or gptq_in_config: gptq_model = True gptq_file = os.path.join(path, "model.safetensors") elif gptq_legacy_files: