mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
GenericTokenizer: Fall back to defined tokenizer
Shouldn't be relied on for model-agnostic code, but for loading processes where you know the tokenizer class used it should be okie dokie
This commit is contained in:
@@ -137,8 +137,8 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
|
|||||||
# Use save_pretrained to convert fp32 models to fp16,
|
# Use save_pretrained to convert fp32 models to fp16,
|
||||||
# unless we are using disk cache because save_pretrained
|
# unless we are using disk cache because save_pretrained
|
||||||
# is not supported in that case
|
# is not supported in that case
|
||||||
model = model.half()
|
self.model = self.model.half()
|
||||||
model.save_pretrained(
|
self.model.save_pretrained(
|
||||||
self.get_local_model_path(ignore_existance=True),
|
self.get_local_model_path(ignore_existance=True),
|
||||||
max_shard_size="500MiB",
|
max_shard_size="500MiB",
|
||||||
)
|
)
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from typing import List, Union
|
from typing import Any, List, Union
|
||||||
from tokenizers import Tokenizer
|
from tokenizers import Tokenizer
|
||||||
import torch
|
import torch
|
||||||
from transformers import PreTrainedTokenizer
|
from transformers import PreTrainedTokenizer
|
||||||
@@ -10,10 +10,16 @@ class GenericTokenizer:
|
|||||||
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
|
|
||||||
# TODO: Get rid of this
|
def __getattr__(self, name: str) -> Any:
|
||||||
self._koboldai_header = []
|
# Fall back to tokenizer for non-generic stuff
|
||||||
|
return getattr(self.tokenizer, name)
|
||||||
|
|
||||||
self.get_vocab = tokenizer.get_vocab
|
def __setattr__(self, name: str, value: Any) -> None:
|
||||||
|
# To prevent infinite recursion on __init__ setting
|
||||||
|
if name == "tokenizer":
|
||||||
|
super().__setattr__(name, value)
|
||||||
|
return
|
||||||
|
setattr(self.tokenizer, name, value)
|
||||||
|
|
||||||
def encode(self, text: str) -> list:
|
def encode(self, text: str) -> list:
|
||||||
if isinstance(self.tokenizer, PreTrainedTokenizer):
|
if isinstance(self.tokenizer, PreTrainedTokenizer):
|
||||||
|
Reference in New Issue
Block a user