Merge pull request #91 from VE-FORBRYDERNE/transformers-version-check
Put the XGLM embedding patch behind a version check
This commit is contained in:
commit
c20435855b
41
aiserver.py
41
aiserver.py
|
@ -1015,27 +1015,28 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
|
||||||
import transformers.generation_utils
|
import transformers.generation_utils
|
||||||
from transformers import __version__ as transformers_version
|
from transformers import __version__ as transformers_version
|
||||||
|
|
||||||
# Temporary fix for XGLM positional embedding issues until
|
# Some versions of transformers 4.17.0.dev0 are affected by
|
||||||
# https://github.com/huggingface/transformers/issues/15736
|
# https://github.com/huggingface/transformers/issues/15736
|
||||||
# is resolved
|
# This is a workaround for those versions of transformers.
|
||||||
try:
|
if(transformers_version == "4.17.0.dev0"):
|
||||||
from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding
|
try:
|
||||||
except ImportError:
|
from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding
|
||||||
pass
|
except ImportError:
|
||||||
else:
|
pass
|
||||||
@torch.no_grad()
|
else:
|
||||||
def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0):
|
@torch.no_grad()
|
||||||
bsz, seq_len = inputs_embeds.size()[:-1]
|
def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0):
|
||||||
input_shape = inputs_embeds.size()[:-1]
|
bsz, seq_len = inputs_embeds.size()[:-1]
|
||||||
sequence_length = input_shape[1]
|
input_shape = inputs_embeds.size()[:-1]
|
||||||
position_ids = torch.arange(
|
sequence_length = input_shape[1]
|
||||||
past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
|
position_ids = torch.arange(
|
||||||
).unsqueeze(0).expand(input_shape).contiguous()
|
past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
|
||||||
max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
|
).unsqueeze(0).expand(input_shape).contiguous()
|
||||||
if max_pos > self.weights.size(0):
|
max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
|
||||||
self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx)
|
if max_pos > self.weights.size(0):
|
||||||
return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach()
|
self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx)
|
||||||
XGLMSinusoidalPositionalEmbedding.forward = new_forward
|
return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach()
|
||||||
|
XGLMSinusoidalPositionalEmbedding.forward = new_forward
|
||||||
|
|
||||||
# Patch transformers to use our soft prompt
|
# Patch transformers to use our soft prompt
|
||||||
def patch_causallm(cls):
|
def patch_causallm(cls):
|
||||||
|
|
Loading…
Reference in New Issue