mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
GenericTokenizer: Fall back to defined tokenizer
Shouldn't be relied on for model-agnostic code, but for loading processes where you know the tokenizer class used it should be okie dokie
This commit is contained in:
@@ -137,8 +137,8 @@ class GenericHFTorchInferenceModel(HFTorchInferenceModel):
|
||||
# Use save_pretrained to convert fp32 models to fp16,
|
||||
# unless we are using disk cache because save_pretrained
|
||||
# is not supported in that case
|
||||
model = model.half()
|
||||
model.save_pretrained(
|
||||
self.model = self.model.half()
|
||||
self.model.save_pretrained(
|
||||
self.get_local_model_path(ignore_existance=True),
|
||||
max_shard_size="500MiB",
|
||||
)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from typing import List, Union
|
||||
from typing import Any, List, Union
|
||||
from tokenizers import Tokenizer
|
||||
import torch
|
||||
from transformers import PreTrainedTokenizer
|
||||
@@ -10,10 +10,16 @@ class GenericTokenizer:
|
||||
def __init__(self, tokenizer: Union[Tokenizer, PreTrainedTokenizer]) -> None:
|
||||
self.tokenizer = tokenizer
|
||||
|
||||
# TODO: Get rid of this
|
||||
self._koboldai_header = []
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
# Fall back to tokenizer for non-generic stuff
|
||||
return getattr(self.tokenizer, name)
|
||||
|
||||
self.get_vocab = tokenizer.get_vocab
|
||||
def __setattr__(self, name: str, value: Any) -> None:
|
||||
# To prevent infinite recursion on __init__ setting
|
||||
if name == "tokenizer":
|
||||
super().__setattr__(name, value)
|
||||
return
|
||||
setattr(self.tokenizer, name, value)
|
||||
|
||||
def encode(self, text: str) -> list:
|
||||
if isinstance(self.tokenizer, PreTrainedTokenizer):
|
||||
|
Reference in New Issue
Block a user