Fix GPT2

2025-06-05 21:59:24 +02:00 · 2023-07-24 02:05:07 +02:00
parent 9fc9cb92f7
commit 30495cf8d8
2 changed files with 9 additions and 3 deletions
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -124,6 +124,7 @@ class model_backend(HFTorchInferenceModel):
            # We must disable low_cpu_mem_usage and if using a GPT-2 model
            # because GPT-2 is not compatible with this feature yet.
            tf_kwargs.pop("low_cpu_mem_usage", None)
            tf_kwargs.pop("quantization_config", None)
            # Also, lazy loader doesn't support GPT-2 models
            self.lazy_load = False
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -126,7 +126,12 @@ class HFTorchInferenceModel(HFInferenceModel):
        return ret
    def get_auxilary_device(self) -> Union[str, int, torch.device]:
        if self.breakmodel:
            return self.breakmodel_config.primary_device
        if self.usegpu:
            return "cuda:0"
        else:
            return "cpu"
    def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
        if self.breakmodel_config.primary_device == "cpu":