mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Modify exllama to load unrenamed gptq quantized models
Read config.json and enable exllama loading if the model has a `quantization_config` with `quant_methdod` of `gptq`. Note that this implementation is limited and only supports model.safetensors. That said, this supports loading popular gptq quantized models without renaming or symlinking the model file.
This commit is contained in:
@@ -49,9 +49,16 @@ def load_model_gptq_settings(path):
|
|||||||
|
|
||||||
gptq_model = False
|
gptq_model = False
|
||||||
gptq_file = False
|
gptq_file = False
|
||||||
|
gptq_in_config = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
if js['quantization_config']['quant_method'] == "gptq":
|
||||||
|
gptq_in_config = True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors"))
|
gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors"))
|
||||||
if "gptq_bits" in js:
|
if "gptq_bits" in js or gptq_in_config:
|
||||||
gptq_model = True
|
gptq_model = True
|
||||||
gptq_file = os.path.join(path, "model.safetensors")
|
gptq_file = os.path.join(path, "model.safetensors")
|
||||||
elif gptq_legacy_files:
|
elif gptq_legacy_files:
|
||||||
|
Reference in New Issue
Block a user