mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix GPT2
This commit is contained in:
@@ -124,7 +124,8 @@ class model_backend(HFTorchInferenceModel):
|
||||
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
||||
# because GPT-2 is not compatible with this feature yet.
|
||||
tf_kwargs.pop("low_cpu_mem_usage", None)
|
||||
|
||||
tf_kwargs.pop("quantization_config", None)
|
||||
|
||||
# Also, lazy loader doesn't support GPT-2 models
|
||||
self.lazy_load = False
|
||||
|
||||
|
@@ -126,8 +126,13 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
return ret
|
||||
|
||||
def get_auxilary_device(self) -> Union[str, int, torch.device]:
|
||||
return self.breakmodel_config.primary_device
|
||||
|
||||
if self.breakmodel:
|
||||
return self.breakmodel_config.primary_device
|
||||
if self.usegpu:
|
||||
return "cuda:0"
|
||||
else:
|
||||
return "cpu"
|
||||
|
||||
def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
|
||||
if self.breakmodel_config.primary_device == "cpu":
|
||||
return torch.float32
|
||||
|
Reference in New Issue
Block a user