diff --git a/modeling/inference_models/basic_hf/class.py b/modeling/inference_models/basic_hf/class.py index 9d4b643b..74bfcd17 100644 --- a/modeling/inference_models/basic_hf/class.py +++ b/modeling/inference_models/basic_hf/class.py @@ -1,41 +1,119 @@ from __future__ import annotations +import gc import os import shutil import time +import warnings from typing import List, Optional, Union import torch import transformers -from transformers import LogitsProcessorList -from transformers.models.auto.modeling_auto import AutoModelForCausalLM +from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList import utils from logger import logger +import koboldai_settings from modeling import warpers from modeling.inference_model import ( GenerationResult, GenerationSettings, + InferenceModel, use_core_manipulations, ) -from modeling.inference_models.hf import HFInferenceModel model_backend_name = "Basic Huggingface" model_backend_type = "Huggingface" -class model_backend(HFInferenceModel): - # Model backends must inherit from InferenceModel. We inherit from HFInferenceModel here, - # as it provides some helpers for handling Huggingface configs. +class model_backend(InferenceModel): + # Model backends must inherit from InferenceModel. def __init__(self) -> None: super().__init__() self.model_name = "Basic Huggingface" + self.path = None - # TODO: These feel weird to be in HFInferenceModel, maybe we could implement - # them in subclasses? - self.hf_torch = True - self.nobreakmodel = True + def get_requested_parameters( + self, model_name: str, model_path: str, menu_path: str, parameters: dict = {} + ): + requested_parameters = [] + + if model_name == "customhuggingface": + requested_parameters.append( + { + "uitype": "text", + "unit": "text", + "label": "Huggingface Model Name", + "id": "custom_model_name", + "default": parameters.get("custom_model_name", ""), + "check": {"value": "", "check": "!="}, + "tooltip": "Model name from https://huggingface.co/", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "", + } + ) + + if model_name != "customhuggingface" or "custom_model_name" in parameters: + model_name = parameters.get("custom_model_name", None) or model_name + alt_model_path = self.get_local_model_path() + + if model_path and os.path.exists(model_path): + # Use passed model path + self.model_config = AutoConfig.from_pretrained(model_path) + elif alt_model_path: + # Use known model path + self.model_config = AutoConfig.from_pretrained( + alt_model_path, + revision=utils.koboldai_vars.revision, + cache_dir="cache", + ) + else: + # No model path locally, we'll probably have to download + self.model_config = AutoConfig.from_pretrained( + model_name, revision=utils.koboldai_vars.revision, cache_dir="cache" + ) + + return requested_parameters + + def set_input_parameters(self, parameters: dict): + self.model_name = parameters.get("custom_model_name", parameters["id"]) + self.path = parameters.get("path", None) + logger.info(parameters) + + def unload(self): + if hasattr(self, "model"): + self.model = None + + if hasattr(self, "tokenizer"): + self.tokenizer = None + + if hasattr(self, "model_config"): + self.model_config = None + + with torch.no_grad(): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message="torch.distributed.reduce_op is deprecated" + ) + for tensor in gc.get_objects(): + try: + if torch.is_tensor(tensor): + tensor.set_( + torch.tensor( + (), device=tensor.device, dtype=tensor.dtype + ) + ) + except: + pass + gc.collect() + + try: + with torch.no_grad(): + torch.cuda.empty_cache() + except: + pass def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = False @@ -58,12 +136,9 @@ class model_backend(HFInferenceModel): self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto" ) - if self.usegpu: - self.model = self.model.to("cuda") - self.tokenizer = self._get_tokenizer(self.get_local_model_path()) - self.model.kai_model = self + self.badwordsids = koboldai_settings.badwordsids_default utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim # Patch Huggingface stuff to use our samplers @@ -143,11 +218,8 @@ class model_backend(HFInferenceModel): else: gen_in = prompt_tokens - if not self.usegpu: - gen_in = gen_in.to("cpu") - else: - device = self.get_auxilary_device() - gen_in = gen_in.to(device) + device = self.get_auxilary_device() + gen_in = gen_in.to(device) additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] @@ -179,3 +251,70 @@ class model_backend(HFInferenceModel): is_whole_generation=False, output_includes_prompt=True, ) + + def get_local_model_path( + self, legacy: bool = False, ignore_existance: bool = False + ) -> Optional[str]: + """ + Returns a string of the model's path locally, or None if it is not downloaded. + If ignore_existance is true, it will always return a path. + """ + if self.path is not None: + if os.path.exists(self.path): + return self.path + + if self.model_name in [ + "NeoCustom", + "GPT2Custom", + "TPUMeshTransformerGPTJ", + "TPUMeshTransformerGPTNeoX", + ]: + model_path = self.path + assert model_path + + # Path can be absolute or relative to models directory + if os.path.exists(model_path): + return model_path + + model_path = os.path.join("models", model_path) + + try: + assert os.path.exists(model_path) + except AssertionError: + logger.error( + f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'." + ) + raise + + return model_path + + basename = self.model_name.replace("/", "_") + if legacy: + ret = basename + else: + ret = os.path.join("models", basename) + + if os.path.isdir(ret) or ignore_existance: + return ret + return None + + def init_model_config(self) -> None: + # Get the model_type from the config or assume a model type if it isn't present + try: + self.model_config = AutoConfig.from_pretrained( + self.get_local_model_path() or self.model_name, + revision=utils.koboldai_vars.revision, + cache_dir="cache", + ) + self.model_type = self.model_config.model_type + except ValueError: + self.model_type = { + "NeoCustom": "gpt_neo", + "GPT2Custom": "gpt2", + }.get(self.model) + + if not self.model_type: + logger.warning( + "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" + ) + self.model_type = "gpt_neo"