Revert ipex.optimize_transformers

2023-12-15 14:42:27 +03:00 · 2023-12-15 14:42:27 +03:00 · 9dbb556cc1
parent c1ae1d7341
commit 9dbb556cc1
2 changed files with 0 additions and 20 deletions
--- a/environments/ipex.yml
+++ b/environments/ipex.yml
@ -27,8 +27,6 @@ dependencies:
    - --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
    - torch==2.1.0a0
    - intel-extension-for-pytorch==2.1.10+xpu
-    - oneccl-bind-pt==2.1.100+xpu; sys_platform == 'linux'
-    - deepspeed; sys_platform == 'linux'
    - openvino
    - onnxruntime-openvino
    - flask-cloudflared==0.0.10
@ -57,8 +55,6 @@ dependencies:
    - einops
    - peft==0.3.0
    - scipy
-    - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
    - windows-curses; sys_platform == 'win32'
    - pynvml
    - omegaconf
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@ -423,14 +423,6 @@ class HFTorchInferenceModel(HFInferenceModel):
                        torch_dtype=self._get_target_dtype(),
                        **tf_kwargs,
                    )
-
-                    if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None:
-                        import intel_extension_for_pytorch as ipex
-                        model = model.to(memory_format=torch.channels_last)
-                        if hasattr(ipex, "optimize_transformers"):
-                            model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True)
-                        else:
-                            model = ipex.optimize(model.eval(), dtype=torch.float16, inplace=True)
            except Exception as e:
                # ...but fall back to stock HF if lazyloader fails.
                if utils.args.panic:
@ -447,14 +439,6 @@ class HFTorchInferenceModel(HFInferenceModel):
                    **tf_kwargs,
                )

-                if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None:
-                    import intel_extension_for_pytorch as ipex
-                    model = model.to(memory_format=torch.channels_last)
-                    if hasattr(ipex, "optimize_transformers"):
-                        model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True)
-                    else:
-                        model = ipex.optimize(model.eval(), dtype=torch.float16, inplace=True)
-
            if not self.lazy_load and not self.breakmodel:
                # We need to move the model to the desired device
                if (not self.usegpu) or torch.cuda.device_count() <= 0: