mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix llama 4-bit loading error
This commit is contained in:
@@ -14,7 +14,7 @@ from transformers import AutoModelForCausalLM, GPTNeoForCausalLM, AutoTokenizer,
|
|||||||
import utils
|
import utils
|
||||||
import modeling.lazy_loader as lazy_loader
|
import modeling.lazy_loader as lazy_loader
|
||||||
import koboldai_settings
|
import koboldai_settings
|
||||||
from logger import logger, set_logger_verbosity, quiesce_logger
|
from logger import logger, set_logger_verbosity
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import breakmodel
|
import breakmodel
|
||||||
@@ -24,6 +24,7 @@ except ModuleNotFoundError as e:
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
from modeling.inference_models.hf_torch import HFTorchInferenceModel
|
from modeling.inference_models.hf_torch import HFTorchInferenceModel
|
||||||
|
from modeling.tokenizer import GenericTokenizer
|
||||||
|
|
||||||
# 4-bit dependencies
|
# 4-bit dependencies
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -362,4 +363,4 @@ class HFTorch4BitInferenceModel(HFTorchInferenceModel):
|
|||||||
else:
|
else:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(utils.koboldai_vars.custmodpth)
|
tokenizer = AutoTokenizer.from_pretrained(utils.koboldai_vars.custmodpth)
|
||||||
|
|
||||||
return tokenizer
|
return GenericTokenizer(tokenizer)
|
||||||
|
Reference in New Issue
Block a user