diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index fb9fe39e..1b411c95 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -412,7 +412,7 @@ class HFTorchInferenceModel(HFInferenceModel): **tf_kwargs, ) - if not self.lazy_load: + if not self.lazy_load and not self.breakmodel: # We need to move the model to the desired device if (not self.usegpu) or torch.cuda.device_count() <= 0: model = model.to("cpu")