mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
24 lines
735 B
Python
24 lines
735 B
Python
from typing import List, Union
|
|
from tokenizers import Tokenizer
|
|
import torch
|
|
from transformers import PreTrainedTokenizer
|
|
|
|
|
|
class GenericTokenizer:
|
|
"""Bridges the gap between Transformers tokenizers and Tokenizers tokenizers. Why they aren't the same, I don't know."""
|
|
|
|
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
|
self.tokenizer = tokenizer
|
|
|
|
# TODO: Get rid of this
|
|
self._koboldai_header = []
|
|
|
|
def encode(self, text: str) -> list:
|
|
return self.tokenizer.encode(text).ids
|
|
|
|
def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str:
|
|
if isinstance(tokens, int):
|
|
tokens = [tokens]
|
|
|
|
return self.tokenizer.decode(tokens)
|