mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Experiment
This commit is contained in:
@@ -29,15 +29,16 @@ class GenericTokenizer:
|
|||||||
return ret.ids
|
return ret.ids
|
||||||
|
|
||||||
def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str:
|
def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str:
|
||||||
|
return self.tokenizer.decode(tokens)
|
||||||
if isinstance(tokens, torch.Tensor):
|
if isinstance(tokens, torch.Tensor):
|
||||||
tokens = tokens.cpu().tolist()
|
tokens = tokens.cpu().tolist()
|
||||||
|
|
||||||
if isinstance(tokens, int):
|
if isinstance(tokens, int):
|
||||||
tokens = [tokens]
|
tokens = [tokens]
|
||||||
|
|
||||||
for t in tokens:
|
# Sometimes soft token placeholders aren't in the vocab, which causes
|
||||||
if t not in self.valid_tokens:
|
# errors on decode. Obviously we can't express these tokens as text so
|
||||||
print(f"WHAT ON EARTH IS {t}")
|
# we can probably slice 'em out without too much issue
|
||||||
tokens = [t for t in tokens if t in self.valid_tokens]
|
tokens = [t for t in tokens if t in self.valid_tokens]
|
||||||
|
|
||||||
return self.tokenizer.decode(tokens)
|
return self.tokenizer.decode(tokens)
|
||||||
|
Reference in New Issue
Block a user