diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index fb9fe39e..1b411c95 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -412,7 +412,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                     **tf_kwargs,
                 )
 
-            if not self.lazy_load:
+            if not self.lazy_load and not self.breakmodel:
                 # We need to move the model to the desired device
                 if (not self.usegpu) or torch.cuda.device_count() <= 0:
                     model = model.to("cpu")