diff --git a/environments/ipex.yml b/environments/ipex.yml index 5bbbc49c..55629d04 100644 --- a/environments/ipex.yml +++ b/environments/ipex.yml @@ -29,6 +29,10 @@ dependencies: - intel-extension-for-pytorch==2.1.10+xpu - mkl==2024.0.0 - mkl-dpcpp==2024.0.0 + - oneccl-bind-pt==2.1.100+xpu; sys_platform == 'linux' + - impi-devel==2021.11.0; sys_platform == 'linux' + - oneccl-devel==2021.11.1; sys_platform == 'linux' + - deepspeed; sys_platform == 'linux' - openvino - onnxruntime-openvino - flask-cloudflared==0.0.10 diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index fcdd9fb9..cda44e99 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -423,6 +423,10 @@ class HFTorchInferenceModel(HFInferenceModel): torch_dtype=self._get_target_dtype(), **tf_kwargs, ) + + if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None: + import intel_extension_for_pytorch as ipex + model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True) except Exception as e: # ...but fall back to stock HF if lazyloader fails. if utils.args.panic: @@ -439,6 +443,10 @@ class HFTorchInferenceModel(HFInferenceModel): **tf_kwargs, ) + if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None: + import intel_extension_for_pytorch as ipex + model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True) + if not self.lazy_load and not self.breakmodel: # We need to move the model to the desired device if (not self.usegpu) or torch.cuda.device_count() <= 0: