Make basic hf independant from hf

This commit is contained in:
somebody
2023-07-12 18:36:30 -05:00
parent d17ce8461d
commit f67cb7fa05

View File

@@ -1,41 +1,119 @@
from __future__ import annotations from __future__ import annotations
import gc
import os import os
import shutil import shutil
import time import time
import warnings
from typing import List, Optional, Union from typing import List, Optional, Union
import torch import torch
import transformers import transformers
from transformers import LogitsProcessorList from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
from transformers.models.auto.modeling_auto import AutoModelForCausalLM
import utils import utils
from logger import logger from logger import logger
import koboldai_settings
from modeling import warpers from modeling import warpers
from modeling.inference_model import ( from modeling.inference_model import (
GenerationResult, GenerationResult,
GenerationSettings, GenerationSettings,
InferenceModel,
use_core_manipulations, use_core_manipulations,
) )
from modeling.inference_models.hf import HFInferenceModel
model_backend_name = "Basic Huggingface" model_backend_name = "Basic Huggingface"
model_backend_type = "Huggingface" model_backend_type = "Huggingface"
class model_backend(HFInferenceModel): class model_backend(InferenceModel):
# Model backends must inherit from InferenceModel. We inherit from HFInferenceModel here, # Model backends must inherit from InferenceModel.
# as it provides some helpers for handling Huggingface configs.
def __init__(self) -> None: def __init__(self) -> None:
super().__init__() super().__init__()
self.model_name = "Basic Huggingface" self.model_name = "Basic Huggingface"
self.path = None
# TODO: These feel weird to be in HFInferenceModel, maybe we could implement def get_requested_parameters(
# them in subclasses? self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
self.hf_torch = True ):
self.nobreakmodel = True requested_parameters = []
if model_name == "customhuggingface":
requested_parameters.append(
{
"uitype": "text",
"unit": "text",
"label": "Huggingface Model Name",
"id": "custom_model_name",
"default": parameters.get("custom_model_name", ""),
"check": {"value": "", "check": "!="},
"tooltip": "Model name from https://huggingface.co/",
"menu_path": "",
"refresh_model_inputs": True,
"extra_classes": "",
}
)
if model_name != "customhuggingface" or "custom_model_name" in parameters:
model_name = parameters.get("custom_model_name", None) or model_name
alt_model_path = self.get_local_model_path()
if model_path and os.path.exists(model_path):
# Use passed model path
self.model_config = AutoConfig.from_pretrained(model_path)
elif alt_model_path:
# Use known model path
self.model_config = AutoConfig.from_pretrained(
alt_model_path,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
)
else:
# No model path locally, we'll probably have to download
self.model_config = AutoConfig.from_pretrained(
model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
)
return requested_parameters
def set_input_parameters(self, parameters: dict):
self.model_name = parameters.get("custom_model_name", parameters["id"])
self.path = parameters.get("path", None)
logger.info(parameters)
def unload(self):
if hasattr(self, "model"):
self.model = None
if hasattr(self, "tokenizer"):
self.tokenizer = None
if hasattr(self, "model_config"):
self.model_config = None
with torch.no_grad():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="torch.distributed.reduce_op is deprecated"
)
for tensor in gc.get_objects():
try:
if torch.is_tensor(tensor):
tensor.set_(
torch.tensor(
(), device=tensor.device, dtype=tensor.dtype
)
)
except:
pass
gc.collect()
try:
with torch.no_grad():
torch.cuda.empty_cache()
except:
pass
def _load(self, save_model: bool, initial_load: bool) -> None: def _load(self, save_model: bool, initial_load: bool) -> None:
utils.koboldai_vars.allowsp = False utils.koboldai_vars.allowsp = False
@@ -58,12 +136,9 @@ class model_backend(HFInferenceModel):
self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto" self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto"
) )
if self.usegpu:
self.model = self.model.to("cuda")
self.tokenizer = self._get_tokenizer(self.get_local_model_path()) self.tokenizer = self._get_tokenizer(self.get_local_model_path())
self.model.kai_model = self self.model.kai_model = self
self.badwordsids = koboldai_settings.badwordsids_default
utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
# Patch Huggingface stuff to use our samplers # Patch Huggingface stuff to use our samplers
@@ -143,11 +218,8 @@ class model_backend(HFInferenceModel):
else: else:
gen_in = prompt_tokens gen_in = prompt_tokens
if not self.usegpu: device = self.get_auxilary_device()
gen_in = gen_in.to("cpu") gen_in = gen_in.to(device)
else:
device = self.get_auxilary_device()
gen_in = gen_in.to(device)
additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
@@ -179,3 +251,70 @@ class model_backend(HFInferenceModel):
is_whole_generation=False, is_whole_generation=False,
output_includes_prompt=True, output_includes_prompt=True,
) )
def get_local_model_path(
self, legacy: bool = False, ignore_existance: bool = False
) -> Optional[str]:
"""
Returns a string of the model's path locally, or None if it is not downloaded.
If ignore_existance is true, it will always return a path.
"""
if self.path is not None:
if os.path.exists(self.path):
return self.path
if self.model_name in [
"NeoCustom",
"GPT2Custom",
"TPUMeshTransformerGPTJ",
"TPUMeshTransformerGPTNeoX",
]:
model_path = self.path
assert model_path
# Path can be absolute or relative to models directory
if os.path.exists(model_path):
return model_path
model_path = os.path.join("models", model_path)
try:
assert os.path.exists(model_path)
except AssertionError:
logger.error(
f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
)
raise
return model_path
basename = self.model_name.replace("/", "_")
if legacy:
ret = basename
else:
ret = os.path.join("models", basename)
if os.path.isdir(ret) or ignore_existance:
return ret
return None
def init_model_config(self) -> None:
# Get the model_type from the config or assume a model type if it isn't present
try:
self.model_config = AutoConfig.from_pretrained(
self.get_local_model_path() or self.model_name,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
)
self.model_type = self.model_config.model_type
except ValueError:
self.model_type = {
"NeoCustom": "gpt_neo",
"GPT2Custom": "gpt2",
}.get(self.model)
if not self.model_type:
logger.warning(
"No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
)
self.model_type = "gpt_neo"