Revert ipex.optimize_transformers
This commit is contained in:
parent
c1ae1d7341
commit
9dbb556cc1
|
@ -27,8 +27,6 @@ dependencies:
|
|||
- --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
- torch==2.1.0a0
|
||||
- intel-extension-for-pytorch==2.1.10+xpu
|
||||
- oneccl-bind-pt==2.1.100+xpu; sys_platform == 'linux'
|
||||
- deepspeed; sys_platform == 'linux'
|
||||
- openvino
|
||||
- onnxruntime-openvino
|
||||
- flask-cloudflared==0.0.10
|
||||
|
@ -57,8 +55,6 @@ dependencies:
|
|||
- einops
|
||||
- peft==0.3.0
|
||||
- scipy
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- windows-curses; sys_platform == 'win32'
|
||||
- pynvml
|
||||
- omegaconf
|
|
@ -423,14 +423,6 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||
torch_dtype=self._get_target_dtype(),
|
||||
**tf_kwargs,
|
||||
)
|
||||
|
||||
if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
model = model.to(memory_format=torch.channels_last)
|
||||
if hasattr(ipex, "optimize_transformers"):
|
||||
model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True)
|
||||
else:
|
||||
model = ipex.optimize(model.eval(), dtype=torch.float16, inplace=True)
|
||||
except Exception as e:
|
||||
# ...but fall back to stock HF if lazyloader fails.
|
||||
if utils.args.panic:
|
||||
|
@ -447,14 +439,6 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||
**tf_kwargs,
|
||||
)
|
||||
|
||||
if hasattr(torch, "xpu") and torch.xpu.is_available and os.environ.get('DISABLE_IPEX_OPTIMIZE', None) is None:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
model = model.to(memory_format=torch.channels_last)
|
||||
if hasattr(ipex, "optimize_transformers"):
|
||||
model = ipex.optimize_transformers(model.eval(), dtype=torch.float16, device="xpu", inplace=True)
|
||||
else:
|
||||
model = ipex.optimize(model.eval(), dtype=torch.float16, inplace=True)
|
||||
|
||||
if not self.lazy_load and not self.breakmodel:
|
||||
# We need to move the model to the desired device
|
||||
if (not self.usegpu) or torch.cuda.device_count() <= 0:
|
||||
|
|
Loading…
Reference in New Issue