mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix GPT2
This commit is contained in:
@@ -124,6 +124,7 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
||||||
# because GPT-2 is not compatible with this feature yet.
|
# because GPT-2 is not compatible with this feature yet.
|
||||||
tf_kwargs.pop("low_cpu_mem_usage", None)
|
tf_kwargs.pop("low_cpu_mem_usage", None)
|
||||||
|
tf_kwargs.pop("quantization_config", None)
|
||||||
|
|
||||||
# Also, lazy loader doesn't support GPT-2 models
|
# Also, lazy loader doesn't support GPT-2 models
|
||||||
self.lazy_load = False
|
self.lazy_load = False
|
||||||
|
@@ -126,7 +126,12 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def get_auxilary_device(self) -> Union[str, int, torch.device]:
|
def get_auxilary_device(self) -> Union[str, int, torch.device]:
|
||||||
|
if self.breakmodel:
|
||||||
return self.breakmodel_config.primary_device
|
return self.breakmodel_config.primary_device
|
||||||
|
if self.usegpu:
|
||||||
|
return "cuda:0"
|
||||||
|
else:
|
||||||
|
return "cpu"
|
||||||
|
|
||||||
def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
|
def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
|
||||||
if self.breakmodel_config.primary_device == "cpu":
|
if self.breakmodel_config.primary_device == "cpu":
|
||||||
|
Reference in New Issue
Block a user