mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Debuuuug
This commit is contained in:
@@ -9,6 +9,7 @@ class GenericTokenizer:
|
|||||||
|
|
||||||
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
|
self.valid_tokens = set(self.tokenizer.vocab.values())
|
||||||
|
|
||||||
def __getattr__(self, name: str) -> Any:
|
def __getattr__(self, name: str) -> Any:
|
||||||
# Fall back to tokenizer for non-generic stuff
|
# Fall back to tokenizer for non-generic stuff
|
||||||
@@ -34,4 +35,9 @@ class GenericTokenizer:
|
|||||||
if isinstance(tokens, int):
|
if isinstance(tokens, int):
|
||||||
tokens = [tokens]
|
tokens = [tokens]
|
||||||
|
|
||||||
return self.tokenizer.decode(tokens, skip_special_tokens=True)
|
for t in tokens:
|
||||||
|
if t not in self.valid_tokens:
|
||||||
|
print(f"WHAT ON EARTH IS {t}")
|
||||||
|
tokens = [t for t in tokens if t in self.valid_tokens]
|
||||||
|
|
||||||
|
return self.tokenizer.decode(tokens)
|
||||||
|
Reference in New Issue
Block a user