mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #391 from one-some/basic-hf-backend
Basic HF backend
This commit is contained in:
37
aiserver.py
37
aiserver.py
@@ -627,19 +627,36 @@ from modeling.patches import patch_transformers
|
|||||||
import importlib
|
import importlib
|
||||||
model_backend_code = {}
|
model_backend_code = {}
|
||||||
model_backends = {}
|
model_backends = {}
|
||||||
|
model_backend_module_names = {}
|
||||||
model_backend_type_crosswalk = {}
|
model_backend_type_crosswalk = {}
|
||||||
|
|
||||||
|
PRIORITIZED_BACKEND_MODULES = ["generic_hf_torch"]
|
||||||
|
|
||||||
for module in os.listdir("./modeling/inference_models"):
|
for module in os.listdir("./modeling/inference_models"):
|
||||||
if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
|
if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
|
||||||
try:
|
try:
|
||||||
model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
|
backend_code = importlib.import_module('modeling.inference_models.{}.class'.format(module))
|
||||||
model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
|
backend_name = backend_code.model_backend_name
|
||||||
if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]) and model_backends[model_backend_code[module].model_backend_name].disable:
|
backend_type = backend_code.model_backend_type
|
||||||
del model_backends[model_backend_code[module].model_backend_name]
|
backend_object = backend_code.model_backend()
|
||||||
|
|
||||||
|
if "disable" in vars(backend_object) and backend_object.disable:
|
||||||
|
continue
|
||||||
|
|
||||||
|
model_backends[backend_name] = backend_object
|
||||||
|
model_backend_code[module] = backend_code
|
||||||
|
|
||||||
|
if backend_name in model_backend_module_names:
|
||||||
|
raise RuntimeError(f"{module} cannot make backend '{backend_name}'; it already exists!")
|
||||||
|
model_backend_module_names[backend_name] = module
|
||||||
|
|
||||||
|
if backend_type in model_backend_type_crosswalk:
|
||||||
|
if module in PRIORITIZED_BACKEND_MODULES:
|
||||||
|
model_backend_type_crosswalk[backend_type].insert(0, backend_name)
|
||||||
else:
|
else:
|
||||||
if model_backend_code[module].model_backend_type in model_backend_type_crosswalk:
|
model_backend_type_crosswalk[backend_type].append(backend_name)
|
||||||
model_backend_type_crosswalk[model_backend_code[module].model_backend_type].append(model_backend_code[module].model_backend_name)
|
|
||||||
else:
|
else:
|
||||||
model_backend_type_crosswalk[model_backend_code[module].model_backend_type] = [model_backend_code[module].model_backend_name]
|
model_backend_type_crosswalk[backend_type] = [backend_name]
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error("Model Backend {} failed to load".format(module))
|
logger.error("Model Backend {} failed to load".format(module))
|
||||||
@@ -6240,7 +6257,11 @@ def UI_2_select_model(data):
|
|||||||
else:
|
else:
|
||||||
#Here we have a model that's not in our menu structure (either a custom model or a custom path
|
#Here we have a model that's not in our menu structure (either a custom model or a custom path
|
||||||
#so we'll just go through all the possible loaders
|
#so we'll just go through all the possible loaders
|
||||||
for model_backend in model_backends:
|
for model_backend in sorted(
|
||||||
|
model_backends,
|
||||||
|
key=lambda x: model_backend_module_names[x] in PRIORITIZED_BACKEND_MODULES,
|
||||||
|
reverse=True,
|
||||||
|
):
|
||||||
if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
|
if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
|
||||||
valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
|
valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
|
||||||
emit("selected_model_info", {"model_backends": valid_loaders})
|
emit("selected_model_info", {"model_backends": valid_loaders})
|
||||||
|
338
modeling/inference_models/basic_hf/class.py
Normal file
338
modeling/inference_models/basic_hf/class.py
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import gc
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
import warnings
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import transformers
|
||||||
|
from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
|
||||||
|
|
||||||
|
import utils
|
||||||
|
from logger import logger
|
||||||
|
import koboldai_settings
|
||||||
|
from modeling import warpers
|
||||||
|
from modeling.inference_model import (
|
||||||
|
GenerationResult,
|
||||||
|
GenerationSettings,
|
||||||
|
InferenceModel,
|
||||||
|
use_core_manipulations,
|
||||||
|
)
|
||||||
|
|
||||||
|
model_backend_name = "Basic Huggingface"
|
||||||
|
model_backend_type = "Huggingface"
|
||||||
|
|
||||||
|
|
||||||
|
class model_backend(InferenceModel):
|
||||||
|
# Model backends must inherit from InferenceModel.
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.model_name = "Basic Huggingface"
|
||||||
|
self.path = None
|
||||||
|
|
||||||
|
def is_valid(self, model_name, model_path, menu_path):
|
||||||
|
try:
|
||||||
|
if model_path is not None and os.path.exists(model_path):
|
||||||
|
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||||
|
elif os.path.exists("models/{}".format(model_name.replace("/", "_"))):
|
||||||
|
self.model_config = AutoConfig.from_pretrained(
|
||||||
|
"models/{}".format(model_name.replace("/", "_")),
|
||||||
|
revision=utils.koboldai_vars.revision,
|
||||||
|
cache_dir="cache",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.model_config = AutoConfig.from_pretrained(
|
||||||
|
model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_requested_parameters(
|
||||||
|
self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
|
||||||
|
):
|
||||||
|
requested_parameters = []
|
||||||
|
|
||||||
|
if model_name == "customhuggingface":
|
||||||
|
requested_parameters.append(
|
||||||
|
{
|
||||||
|
"uitype": "text",
|
||||||
|
"unit": "text",
|
||||||
|
"label": "Huggingface Model Name",
|
||||||
|
"id": "custom_model_name",
|
||||||
|
"default": parameters.get("custom_model_name", ""),
|
||||||
|
"check": {"value": "", "check": "!="},
|
||||||
|
"tooltip": "Model name from https://huggingface.co/",
|
||||||
|
"menu_path": "",
|
||||||
|
"refresh_model_inputs": True,
|
||||||
|
"extra_classes": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if model_name != "customhuggingface" or "custom_model_name" in parameters:
|
||||||
|
model_name = parameters.get("custom_model_name", None) or model_name
|
||||||
|
alt_model_path = self.get_local_model_path()
|
||||||
|
|
||||||
|
if model_path and os.path.exists(model_path):
|
||||||
|
# Use passed model path
|
||||||
|
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||||
|
elif alt_model_path:
|
||||||
|
# Use known model path
|
||||||
|
self.model_config = AutoConfig.from_pretrained(
|
||||||
|
alt_model_path,
|
||||||
|
revision=utils.koboldai_vars.revision,
|
||||||
|
cache_dir="cache",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# No model path locally, we'll probably have to download
|
||||||
|
self.model_config = AutoConfig.from_pretrained(
|
||||||
|
model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
|
||||||
|
)
|
||||||
|
|
||||||
|
return requested_parameters
|
||||||
|
|
||||||
|
def set_input_parameters(self, parameters: dict):
|
||||||
|
self.model_name = parameters.get("custom_model_name", parameters["id"])
|
||||||
|
self.path = parameters.get("path", None)
|
||||||
|
logger.info(parameters)
|
||||||
|
|
||||||
|
def unload(self):
|
||||||
|
if hasattr(self, "model"):
|
||||||
|
self.model = None
|
||||||
|
|
||||||
|
if hasattr(self, "tokenizer"):
|
||||||
|
self.tokenizer = None
|
||||||
|
|
||||||
|
if hasattr(self, "model_config"):
|
||||||
|
self.model_config = None
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore", message="torch.distributed.reduce_op is deprecated"
|
||||||
|
)
|
||||||
|
for tensor in gc.get_objects():
|
||||||
|
try:
|
||||||
|
if torch.is_tensor(tensor):
|
||||||
|
tensor.set_(
|
||||||
|
torch.tensor(
|
||||||
|
(), device=tensor.device, dtype=tensor.dtype
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with torch.no_grad():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
|
utils.koboldai_vars.allowsp = False
|
||||||
|
|
||||||
|
if self.model_name == "NeoCustom":
|
||||||
|
self.model_name = os.path.basename(os.path.normpath(self.path))
|
||||||
|
utils.koboldai_vars.model = self.model_name
|
||||||
|
|
||||||
|
# If we specify a model and it's in the root directory, we need to move
|
||||||
|
# it to the models directory (legacy folder structure to new)
|
||||||
|
if self.get_local_model_path(legacy=True):
|
||||||
|
shutil.move(
|
||||||
|
self.get_local_model_path(legacy=True, ignore_existance=True),
|
||||||
|
self.get_local_model_path(ignore_existance=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.init_model_config()
|
||||||
|
|
||||||
|
self.model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
|
||||||
|
self.model.kai_model = self
|
||||||
|
self.badwordsids = koboldai_settings.badwordsids_default
|
||||||
|
utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
|
||||||
|
|
||||||
|
# Patch Huggingface stuff to use our samplers
|
||||||
|
class KoboldLogitsWarperList(LogitsProcessorList):
|
||||||
|
def __call__(
|
||||||
|
_self, # Unused
|
||||||
|
input_ids: torch.LongTensor,
|
||||||
|
scores: torch.FloatTensor,
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
# Kobold sampling is done here.
|
||||||
|
scores = self._apply_warpers(scores=scores, input_ids=input_ids)
|
||||||
|
|
||||||
|
# Things like Lua integration, phrase bias, and probability visualization are done here.
|
||||||
|
for processor in self.logits_processors:
|
||||||
|
scores = processor(self, scores=scores, input_ids=input_ids)
|
||||||
|
assert (
|
||||||
|
scores is not None
|
||||||
|
), f"Scores are None; processor '{processor}' is to blame"
|
||||||
|
return scores
|
||||||
|
|
||||||
|
def new_sample(self, *args, **kwargs):
|
||||||
|
assert kwargs.pop("logits_warper", None) is not None
|
||||||
|
kwargs["logits_warper"] = KoboldLogitsWarperList()
|
||||||
|
|
||||||
|
if utils.koboldai_vars.newlinemode in ["s", "ns"]:
|
||||||
|
kwargs["eos_token_id"] = -1
|
||||||
|
kwargs.setdefault("pad_token_id", 2)
|
||||||
|
|
||||||
|
return new_sample.old_sample(self, *args, **kwargs)
|
||||||
|
|
||||||
|
new_sample.old_sample = transformers.GenerationMixin.sample
|
||||||
|
use_core_manipulations.sample = new_sample
|
||||||
|
|
||||||
|
def _apply_warpers(
|
||||||
|
self, scores: torch.Tensor, input_ids: torch.Tensor
|
||||||
|
) -> torch.Tensor:
|
||||||
|
"""Applies samplers/warpers to the given scores, returning the altered scores.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scores (torch.Tensor): The original scores.
|
||||||
|
input_ids (torch.Tensor): The input token sequence.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
torch.Tensor: The altered scores.
|
||||||
|
"""
|
||||||
|
warpers.update_settings()
|
||||||
|
|
||||||
|
for sid in utils.koboldai_vars.sampler_order:
|
||||||
|
warper = warpers.Warper.from_id(sid)
|
||||||
|
|
||||||
|
if not warper.value_is_valid():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if warper == warpers.RepetitionPenalty:
|
||||||
|
# Rep pen needs access to input tokens to decide what to penalize
|
||||||
|
scores = warper.torch(scores, input_ids=input_ids)
|
||||||
|
else:
|
||||||
|
scores = warper.torch(scores)
|
||||||
|
|
||||||
|
assert scores is not None, f"Scores are None; warper '{warper}' is to blame"
|
||||||
|
return scores
|
||||||
|
|
||||||
|
def _raw_generate(
|
||||||
|
self,
|
||||||
|
prompt_tokens: Union[List[int], torch.Tensor],
|
||||||
|
max_new: int,
|
||||||
|
gen_settings: GenerationSettings,
|
||||||
|
single_line: bool = False,
|
||||||
|
batch_count: int = 1,
|
||||||
|
seed: Optional[int] = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> GenerationResult:
|
||||||
|
if not isinstance(prompt_tokens, torch.Tensor):
|
||||||
|
gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None]
|
||||||
|
else:
|
||||||
|
gen_in = prompt_tokens
|
||||||
|
|
||||||
|
device = self.get_auxilary_device()
|
||||||
|
gen_in = gen_in.to(device)
|
||||||
|
|
||||||
|
additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
|
||||||
|
|
||||||
|
if seed is not None:
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
start_time = time.time()
|
||||||
|
genout = self.model.generate(
|
||||||
|
gen_in,
|
||||||
|
do_sample=True,
|
||||||
|
max_length=min(
|
||||||
|
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
|
||||||
|
),
|
||||||
|
repetition_penalty=1.0,
|
||||||
|
bad_words_ids=self.badwordsids + additional_bad_words_ids,
|
||||||
|
use_cache=True,
|
||||||
|
num_return_sequences=batch_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"torch_raw_generate: run generator {}s".format(time.time() - start_time)
|
||||||
|
)
|
||||||
|
|
||||||
|
return GenerationResult(
|
||||||
|
self,
|
||||||
|
out_batches=genout,
|
||||||
|
prompt=prompt_tokens,
|
||||||
|
is_whole_generation=False,
|
||||||
|
output_includes_prompt=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_local_model_path(
|
||||||
|
self, legacy: bool = False, ignore_existance: bool = False
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Returns a string of the model's path locally, or None if it is not downloaded.
|
||||||
|
If ignore_existance is true, it will always return a path.
|
||||||
|
"""
|
||||||
|
if self.path is not None:
|
||||||
|
if os.path.exists(self.path):
|
||||||
|
return self.path
|
||||||
|
|
||||||
|
if self.model_name in [
|
||||||
|
"NeoCustom",
|
||||||
|
"GPT2Custom",
|
||||||
|
"TPUMeshTransformerGPTJ",
|
||||||
|
"TPUMeshTransformerGPTNeoX",
|
||||||
|
]:
|
||||||
|
model_path = self.path
|
||||||
|
assert model_path
|
||||||
|
|
||||||
|
# Path can be absolute or relative to models directory
|
||||||
|
if os.path.exists(model_path):
|
||||||
|
return model_path
|
||||||
|
|
||||||
|
model_path = os.path.join("models", model_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert os.path.exists(model_path)
|
||||||
|
except AssertionError:
|
||||||
|
logger.error(
|
||||||
|
f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
return model_path
|
||||||
|
|
||||||
|
basename = self.model_name.replace("/", "_")
|
||||||
|
if legacy:
|
||||||
|
ret = basename
|
||||||
|
else:
|
||||||
|
ret = os.path.join("models", basename)
|
||||||
|
|
||||||
|
if os.path.isdir(ret) or ignore_existance:
|
||||||
|
return ret
|
||||||
|
return None
|
||||||
|
|
||||||
|
def init_model_config(self) -> None:
|
||||||
|
# Get the model_type from the config or assume a model type if it isn't present
|
||||||
|
try:
|
||||||
|
self.model_config = AutoConfig.from_pretrained(
|
||||||
|
self.get_local_model_path() or self.model_name,
|
||||||
|
revision=utils.koboldai_vars.revision,
|
||||||
|
cache_dir="cache",
|
||||||
|
)
|
||||||
|
self.model_type = self.model_config.model_type
|
||||||
|
except ValueError:
|
||||||
|
self.model_type = {
|
||||||
|
"NeoCustom": "gpt_neo",
|
||||||
|
"GPT2Custom": "gpt2",
|
||||||
|
}.get(self.model)
|
||||||
|
|
||||||
|
if not self.model_type:
|
||||||
|
logger.warning(
|
||||||
|
"No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
|
||||||
|
)
|
||||||
|
self.model_type = "gpt_neo"
|
@@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel):
|
|||||||
self.model_config = None
|
self.model_config = None
|
||||||
#self.model_name = model_name
|
#self.model_name = model_name
|
||||||
|
|
||||||
|
self.hf_torch = False
|
||||||
self.model = None
|
self.model = None
|
||||||
self.tokenizer = None
|
self.tokenizer = None
|
||||||
self.badwordsids = koboldai_settings.badwordsids_default
|
self.badwordsids = koboldai_settings.badwordsids_default
|
||||||
|
Reference in New Issue
Block a user