From 554af7b1754fa2e574fbbcfa2a612b13969bda63 Mon Sep 17 00:00:00 2001
From: Llama <34464159+pi6am@users.noreply.github.com>
Date: Sun, 27 Aug 2023 23:56:02 -0700
Subject: [PATCH] Modify exllama to load unrenamed gptq quantized models

Read config.json and enable exllama loading if the model has a
`quantization_config` with `quant_methdod` of `gptq`. Note that this
implementation is limited and only supports model.safetensors.
That said, this supports loading popular gptq quantized models
without renaming or symlinking the model file.
---
 modeling/inference_models/exllama/class.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py
index 737afa88..67f54073 100644
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -49,9 +49,16 @@ def load_model_gptq_settings(path):
 
     gptq_model = False
     gptq_file = False
+    gptq_in_config = False
+
+    try:
+        if js['quantization_config']['quant_method'] == "gptq":
+            gptq_in_config = True
+    except:
+        pass
 
     gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors"))
-    if "gptq_bits" in js:
+    if "gptq_bits" in js or gptq_in_config:
         gptq_model = True
         gptq_file = os.path.join(path, "model.safetensors")
     elif gptq_legacy_files: