Experiment

This commit is contained in:
somebody
2023-04-27 20:28:04 -05:00
parent cd1eb97c2a
commit ffa7b22734

View File

@@ -29,15 +29,16 @@ class GenericTokenizer:
return ret.ids return ret.ids
def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str: def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str:
return self.tokenizer.decode(tokens)
if isinstance(tokens, torch.Tensor): if isinstance(tokens, torch.Tensor):
tokens = tokens.cpu().tolist() tokens = tokens.cpu().tolist()
if isinstance(tokens, int): if isinstance(tokens, int):
tokens = [tokens] tokens = [tokens]
for t in tokens: # Sometimes soft token placeholders aren't in the vocab, which causes
if t not in self.valid_tokens: # errors on decode. Obviously we can't express these tokens as text so
print(f"WHAT ON EARTH IS {t}") # we can probably slice 'em out without too much issue
tokens = [t for t in tokens if t in self.valid_tokens] tokens = [t for t in tokens if t in self.valid_tokens]
return self.tokenizer.decode(tokens) return self.tokenizer.decode(tokens)