mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Don't disable exllama
This commit is contained in:
@@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
except:
|
except:
|
||||||
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
|
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
|
||||||
if autogptq_failed:
|
if autogptq_failed:
|
||||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
|
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
|
||||||
# Patch in embeddings function
|
# Patch in embeddings function
|
||||||
def get_input_embeddings(self):
|
def get_input_embeddings(self):
|
||||||
return self.model.get_input_embeddings()
|
return self.model.get_input_embeddings()
|
||||||
|
Reference in New Issue
Block a user