From 40b4631f6ca426f8ecf7f2e16d5b6fcf58be224a Mon Sep 17 00:00:00 2001 From: Gnome Ann <> Date: Thu, 28 Oct 2021 16:52:39 -0400 Subject: [PATCH] Clamp input_ids in place Apparently transformers maintains an internal reference to input_ids (to use for repetition penalty) so we have to clamp the internal version, too, because otherwise transformers will throw an out-of-bounds error upon attempting to access token IDs that are not in the vocabulary. --- aiserver.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index d1088f22..3bd15e91 100644 --- a/aiserver.py +++ b/aiserver.py @@ -518,13 +518,15 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]): input_ids = kwargs.get('input_ids').to(self.device) assert input_ids is not None kwargs['input_ids'] = None - inputs_embeds = self.transformer.wte(input_ids.clamp(max=self.config.vocab_size-1)) - input_ids = input_ids - self.config.vocab_size # Don't use the -= operator here, you'll get a cryptic error message + if(vars.sp is not None): + shifted_input_ids = input_ids - self.config.vocab_size + input_ids.clamp_(max=self.config.vocab_size-1) + inputs_embeds = self.transformer.wte(input_ids) if(vars.sp is not None): vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device) inputs_embeds = torch.where( - (input_ids >= 0)[:, :, None], - vars.sp[input_ids.clamp(min=0)], + (shifted_input_ids >= 0)[:, :, None], + vars.sp[shifted_input_ids.clamp(min=0)], inputs_embeds, ) kwargs['inputs_embeds'] = inputs_embeds