Fix GPT2

2025-06-05 21:59:24 +02:00 · 2023-07-24 02:05:07 +02:00
parent 9fc9cb92f7
commit 30495cf8d8
2 changed files with 9 additions and 3 deletions
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -124,7 +124,8 @@ class model_backend(HFTorchInferenceModel):
            # We must disable low_cpu_mem_usage and if using a GPT-2 model
            # because GPT-2 is not compatible with this feature yet.
            tf_kwargs.pop("low_cpu_mem_usage", None)
-
+            tf_kwargs.pop("quantization_config", None)
+            
            # Also, lazy loader doesn't support GPT-2 models
            self.lazy_load = False

--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -126,8 +126,13 @@ class HFTorchInferenceModel(HFInferenceModel):
        return ret

    def get_auxilary_device(self) -> Union[str, int, torch.device]:
-        return self.breakmodel_config.primary_device
-
+        if self.breakmodel:
+            return self.breakmodel_config.primary_device
+        if self.usegpu:
+            return "cuda:0"
+        else:
+            return "cpu"
+        
    def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
        if self.breakmodel_config.primary_device == "cpu":
            return torch.float32