mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Preliminary HF GPTQ changes
This commit is contained in:
@@ -57,6 +57,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
temp = json.load(f)
|
||||
else:
|
||||
temp = {}
|
||||
if not hasattr(self.model_config, 'quantization_config'):
|
||||
requested_parameters.append({
|
||||
"uitype": "dropdown",
|
||||
"unit": "text",
|
||||
@@ -105,6 +106,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
"low_cpu_mem_usage": True,
|
||||
}
|
||||
|
||||
if not hasattr(self.model_config, 'quantization_config'):
|
||||
if self.quantization == "8bit":
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
|
@@ -21,7 +21,7 @@ from pathlib import Path
|
||||
|
||||
|
||||
model_backend_type = "GPTQ"
|
||||
model_backend_name = "Huggingface GPTQ"
|
||||
model_backend_name = "Legacy GPTQ"
|
||||
|
||||
|
||||
def load_model_gptq_settings(path):
|
||||
|
Reference in New Issue
Block a user