mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Delete basic 4bit
And add code to handle dangling __pycache__s
This commit is contained in:
61
aiserver.py
61
aiserver.py
@@ -633,34 +633,49 @@ model_backend_type_crosswalk = {}
|
||||
PRIORITIZED_BACKEND_MODULES = ["generic_hf_torch"]
|
||||
|
||||
for module in os.listdir("./modeling/inference_models"):
|
||||
if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
|
||||
try:
|
||||
backend_code = importlib.import_module('modeling.inference_models.{}.class'.format(module))
|
||||
backend_name = backend_code.model_backend_name
|
||||
backend_type = backend_code.model_backend_type
|
||||
backend_object = backend_code.model_backend()
|
||||
if module == '__pycache__':
|
||||
continue
|
||||
|
||||
if "disable" in vars(backend_object) and backend_object.disable:
|
||||
continue
|
||||
module_path = os.path.join("modeling/inference_models", module)
|
||||
if not os.path.isdir(module_path):
|
||||
# Drop-in modules must be folders
|
||||
continue
|
||||
|
||||
model_backends[backend_name] = backend_object
|
||||
model_backend_code[module] = backend_code
|
||||
if os.listdir(module_path) == ["__pycache__"]:
|
||||
# Delete backends which have been deleted upstream. As __pycache__
|
||||
# folders aren't tracked, they'll stick around until we zap em'
|
||||
assert len(os.listdir(module_path)) == 1
|
||||
logger.info(f"Deleting old backend {module}")
|
||||
shutil.rmtree(module_path)
|
||||
continue
|
||||
|
||||
if backend_name in model_backend_module_names:
|
||||
raise RuntimeError(f"{module} cannot make backend '{backend_name}'; it already exists!")
|
||||
model_backend_module_names[backend_name] = module
|
||||
try:
|
||||
backend_code = importlib.import_module('modeling.inference_models.{}.class'.format(module))
|
||||
backend_name = backend_code.model_backend_name
|
||||
backend_type = backend_code.model_backend_type
|
||||
backend_object = backend_code.model_backend()
|
||||
|
||||
if backend_type in model_backend_type_crosswalk:
|
||||
if module in PRIORITIZED_BACKEND_MODULES:
|
||||
model_backend_type_crosswalk[backend_type].insert(0, backend_name)
|
||||
else:
|
||||
model_backend_type_crosswalk[backend_type].append(backend_name)
|
||||
if "disable" in vars(backend_object) and backend_object.disable:
|
||||
continue
|
||||
|
||||
model_backends[backend_name] = backend_object
|
||||
model_backend_code[module] = backend_code
|
||||
|
||||
if backend_name in model_backend_module_names:
|
||||
raise RuntimeError(f"{module} cannot make backend '{backend_name}'; it already exists!")
|
||||
model_backend_module_names[backend_name] = module
|
||||
|
||||
if backend_type in model_backend_type_crosswalk:
|
||||
if module in PRIORITIZED_BACKEND_MODULES:
|
||||
model_backend_type_crosswalk[backend_type].insert(0, backend_name)
|
||||
else:
|
||||
model_backend_type_crosswalk[backend_type] = [backend_name]
|
||||
|
||||
except Exception:
|
||||
logger.error("Model Backend {} failed to load".format(module))
|
||||
logger.error(traceback.format_exc())
|
||||
model_backend_type_crosswalk[backend_type].append(backend_name)
|
||||
else:
|
||||
model_backend_type_crosswalk[backend_type] = [backend_name]
|
||||
|
||||
except Exception:
|
||||
logger.error("Model Backend {} failed to load".format(module))
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends])))
|
||||
|
||||
|
@@ -1,344 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import gc
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import torch
|
||||
import transformers
|
||||
from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
|
||||
|
||||
import utils
|
||||
from logger import logger
|
||||
import koboldai_settings
|
||||
from modeling import warpers
|
||||
from modeling.inference_model import (
|
||||
GenerationResult,
|
||||
GenerationSettings,
|
||||
InferenceModel,
|
||||
use_core_manipulations,
|
||||
)
|
||||
|
||||
model_backend_name = "Basic Huggingface 4-bit"
|
||||
model_backend_type = "Huggingface"
|
||||
|
||||
|
||||
class model_backend(InferenceModel):
|
||||
# Model backends must inherit from InferenceModel.
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
import importlib
|
||||
dependency_exists = importlib.util.find_spec("bitsandbytes")
|
||||
if dependency_exists:
|
||||
self.model_name = "Basic Huggingface"
|
||||
self.path = None
|
||||
else:
|
||||
logger.warning("Bitsandbytes is not installed, you can not use Huggingface models in 4-bit")
|
||||
self.disable = True
|
||||
|
||||
def is_valid(self, model_name, model_path, menu_path):
|
||||
try:
|
||||
if model_path is not None and os.path.exists(model_path):
|
||||
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||
elif os.path.exists("models/{}".format(model_name.replace("/", "_"))):
|
||||
self.model_config = AutoConfig.from_pretrained(
|
||||
"models/{}".format(model_name.replace("/", "_")),
|
||||
revision=utils.koboldai_vars.revision,
|
||||
cache_dir="cache",
|
||||
)
|
||||
else:
|
||||
self.model_config = AutoConfig.from_pretrained(
|
||||
model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
|
||||
)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def get_requested_parameters(
|
||||
self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
|
||||
):
|
||||
requested_parameters = []
|
||||
|
||||
if model_name == "customhuggingface":
|
||||
requested_parameters.append(
|
||||
{
|
||||
"uitype": "text",
|
||||
"unit": "text",
|
||||
"label": "Huggingface Model Name",
|
||||
"id": "custom_model_name",
|
||||
"default": parameters.get("custom_model_name", ""),
|
||||
"check": {"value": "", "check": "!="},
|
||||
"tooltip": "Model name from https://huggingface.co/",
|
||||
"menu_path": "",
|
||||
"refresh_model_inputs": True,
|
||||
"extra_classes": "",
|
||||
}
|
||||
)
|
||||
|
||||
if model_name != "customhuggingface" or "custom_model_name" in parameters:
|
||||
model_name = parameters.get("custom_model_name", None) or model_name
|
||||
alt_model_path = self.get_local_model_path()
|
||||
|
||||
if model_path and os.path.exists(model_path):
|
||||
# Use passed model path
|
||||
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||
elif alt_model_path:
|
||||
# Use known model path
|
||||
self.model_config = AutoConfig.from_pretrained(
|
||||
alt_model_path,
|
||||
revision=utils.koboldai_vars.revision,
|
||||
cache_dir="cache",
|
||||
)
|
||||
else:
|
||||
# No model path locally, we'll probably have to download
|
||||
self.model_config = AutoConfig.from_pretrained(
|
||||
model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
|
||||
)
|
||||
|
||||
return requested_parameters
|
||||
|
||||
def set_input_parameters(self, parameters: dict):
|
||||
self.model_name = parameters.get("custom_model_name", parameters["id"])
|
||||
self.path = parameters.get("path", None)
|
||||
logger.info(parameters)
|
||||
|
||||
def unload(self):
|
||||
if hasattr(self, "model"):
|
||||
self.model = None
|
||||
|
||||
if hasattr(self, "tokenizer"):
|
||||
self.tokenizer = None
|
||||
|
||||
if hasattr(self, "model_config"):
|
||||
self.model_config = None
|
||||
|
||||
with torch.no_grad():
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="torch.distributed.reduce_op is deprecated"
|
||||
)
|
||||
for tensor in gc.get_objects():
|
||||
try:
|
||||
if torch.is_tensor(tensor):
|
||||
tensor.set_(
|
||||
torch.tensor(
|
||||
(), device=tensor.device, dtype=tensor.dtype
|
||||
)
|
||||
)
|
||||
except:
|
||||
pass
|
||||
gc.collect()
|
||||
|
||||
try:
|
||||
with torch.no_grad():
|
||||
torch.cuda.empty_cache()
|
||||
except:
|
||||
pass
|
||||
|
||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||
utils.koboldai_vars.allowsp = False
|
||||
|
||||
if self.model_name == "NeoCustom":
|
||||
self.model_name = os.path.basename(os.path.normpath(self.path))
|
||||
utils.koboldai_vars.model = self.model_name
|
||||
|
||||
# If we specify a model and it's in the root directory, we need to move
|
||||
# it to the models directory (legacy folder structure to new)
|
||||
if self.get_local_model_path(legacy=True):
|
||||
shutil.move(
|
||||
self.get_local_model_path(legacy=True, ignore_existance=True),
|
||||
self.get_local_model_path(ignore_existance=True),
|
||||
)
|
||||
|
||||
self.init_model_config()
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto", load_in_4bit=True
|
||||
)
|
||||
|
||||
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
|
||||
self.model.kai_model = self
|
||||
self.badwordsids = koboldai_settings.badwordsids_default
|
||||
utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
|
||||
|
||||
# Patch Huggingface stuff to use our samplers
|
||||
class KoboldLogitsWarperList(LogitsProcessorList):
|
||||
def __call__(
|
||||
_self, # Unused
|
||||
input_ids: torch.LongTensor,
|
||||
scores: torch.FloatTensor,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
# Kobold sampling is done here.
|
||||
scores = self._apply_warpers(scores=scores, input_ids=input_ids)
|
||||
|
||||
# Things like Lua integration, phrase bias, and probability visualization are done here.
|
||||
for processor in self.logits_processors:
|
||||
scores = processor(self, scores=scores, input_ids=input_ids)
|
||||
assert (
|
||||
scores is not None
|
||||
), f"Scores are None; processor '{processor}' is to blame"
|
||||
return scores
|
||||
|
||||
def new_sample(self, *args, **kwargs):
|
||||
assert kwargs.pop("logits_warper", None) is not None
|
||||
kwargs["logits_warper"] = KoboldLogitsWarperList()
|
||||
|
||||
if utils.koboldai_vars.newlinemode in ["s", "ns"]:
|
||||
kwargs["eos_token_id"] = -1
|
||||
kwargs.setdefault("pad_token_id", 2)
|
||||
|
||||
return new_sample.old_sample(self, *args, **kwargs)
|
||||
|
||||
new_sample.old_sample = transformers.GenerationMixin.sample
|
||||
use_core_manipulations.sample = new_sample
|
||||
|
||||
def _apply_warpers(
|
||||
self, scores: torch.Tensor, input_ids: torch.Tensor
|
||||
) -> torch.Tensor:
|
||||
"""Applies samplers/warpers to the given scores, returning the altered scores.
|
||||
|
||||
Args:
|
||||
scores (torch.Tensor): The original scores.
|
||||
input_ids (torch.Tensor): The input token sequence.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The altered scores.
|
||||
"""
|
||||
warpers.update_settings()
|
||||
|
||||
for sid in utils.koboldai_vars.sampler_order:
|
||||
warper = warpers.Warper.from_id(sid)
|
||||
|
||||
if not warper.value_is_valid():
|
||||
continue
|
||||
|
||||
if warper == warpers.RepetitionPenalty:
|
||||
# Rep pen needs access to input tokens to decide what to penalize
|
||||
scores = warper.torch(scores, input_ids=input_ids)
|
||||
else:
|
||||
scores = warper.torch(scores)
|
||||
|
||||
assert scores is not None, f"Scores are None; warper '{warper}' is to blame"
|
||||
return scores
|
||||
|
||||
def _raw_generate(
|
||||
self,
|
||||
prompt_tokens: Union[List[int], torch.Tensor],
|
||||
max_new: int,
|
||||
gen_settings: GenerationSettings,
|
||||
single_line: bool = False,
|
||||
batch_count: int = 1,
|
||||
seed: Optional[int] = None,
|
||||
**kwargs,
|
||||
) -> GenerationResult:
|
||||
if not isinstance(prompt_tokens, torch.Tensor):
|
||||
gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None]
|
||||
else:
|
||||
gen_in = prompt_tokens
|
||||
|
||||
device = self.get_auxilary_device()
|
||||
gen_in = gen_in.to(device)
|
||||
|
||||
additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
|
||||
|
||||
if seed is not None:
|
||||
torch.manual_seed(seed)
|
||||
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
genout = self.model.generate(
|
||||
gen_in,
|
||||
do_sample=True,
|
||||
max_length=min(
|
||||
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
|
||||
),
|
||||
repetition_penalty=1.0,
|
||||
bad_words_ids=self.badwordsids + additional_bad_words_ids,
|
||||
use_cache=True,
|
||||
num_return_sequences=batch_count,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"torch_raw_generate: run generator {}s".format(time.time() - start_time)
|
||||
)
|
||||
|
||||
return GenerationResult(
|
||||
self,
|
||||
out_batches=genout,
|
||||
prompt=prompt_tokens,
|
||||
is_whole_generation=False,
|
||||
output_includes_prompt=True,
|
||||
)
|
||||
|
||||
def get_local_model_path(
|
||||
self, legacy: bool = False, ignore_existance: bool = False
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns a string of the model's path locally, or None if it is not downloaded.
|
||||
If ignore_existance is true, it will always return a path.
|
||||
"""
|
||||
if self.path is not None:
|
||||
if os.path.exists(self.path):
|
||||
return self.path
|
||||
|
||||
if self.model_name in [
|
||||
"NeoCustom",
|
||||
"GPT2Custom",
|
||||
"TPUMeshTransformerGPTJ",
|
||||
"TPUMeshTransformerGPTNeoX",
|
||||
]:
|
||||
model_path = self.path
|
||||
assert model_path
|
||||
|
||||
# Path can be absolute or relative to models directory
|
||||
if os.path.exists(model_path):
|
||||
return model_path
|
||||
|
||||
model_path = os.path.join("models", model_path)
|
||||
|
||||
try:
|
||||
assert os.path.exists(model_path)
|
||||
except AssertionError:
|
||||
logger.error(
|
||||
f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
|
||||
)
|
||||
raise
|
||||
|
||||
return model_path
|
||||
|
||||
basename = self.model_name.replace("/", "_")
|
||||
if legacy:
|
||||
ret = basename
|
||||
else:
|
||||
ret = os.path.join("models", basename)
|
||||
|
||||
if os.path.isdir(ret) or ignore_existance:
|
||||
return ret
|
||||
return None
|
||||
|
||||
def init_model_config(self) -> None:
|
||||
# Get the model_type from the config or assume a model type if it isn't present
|
||||
try:
|
||||
self.model_config = AutoConfig.from_pretrained(
|
||||
self.get_local_model_path() or self.model_name,
|
||||
revision=utils.koboldai_vars.revision,
|
||||
cache_dir="cache",
|
||||
)
|
||||
self.model_type = self.model_config.model_type
|
||||
except ValueError:
|
||||
self.model_type = {
|
||||
"NeoCustom": "gpt_neo",
|
||||
"GPT2Custom": "gpt2",
|
||||
}.get(self.model)
|
||||
|
||||
if not self.model_type:
|
||||
logger.warning(
|
||||
"No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
|
||||
)
|
||||
self.model_type = "gpt_neo"
|
Reference in New Issue
Block a user