Make basic hf independant from hf

2025-06-05 21:59:24 +02:00 · 2023-07-12 18:36:30 -05:00
parent d17ce8461d
commit f67cb7fa05
1 changed files with 158 additions and 19 deletions
--- a/modeling/inference_models/basic_hf/class.py
+++ b/modeling/inference_models/basic_hf/class.py
@@ -1,41 +1,119 @@
 from __future__ import annotations
 import gc
 import os
 import shutil
 import time
 import warnings
 from typing import List, Optional, Union
 import torch
 import transformers
-from transformers import LogitsProcessorList
+from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
 from transformers.models.auto.modeling_auto import AutoModelForCausalLM
 import utils
 from logger import logger
 import koboldai_settings
 from modeling import warpers
 from modeling.inference_model import (
    GenerationResult,
    GenerationSettings,
    InferenceModel,
    use_core_manipulations,
 )
 from modeling.inference_models.hf import HFInferenceModel
 model_backend_name = "Basic Huggingface"
 model_backend_type = "Huggingface"
-class model_backend(HFInferenceModel):
+class model_backend(InferenceModel):
-    # Model backends must inherit from InferenceModel. We inherit from HFInferenceModel here,
+    # Model backends must inherit from InferenceModel.
    # as it provides some helpers for handling Huggingface configs.
    def __init__(self) -> None:
        super().__init__()
        self.model_name = "Basic Huggingface"
        self.path = None
-        # TODO: These feel weird to be in HFInferenceModel, maybe we could implement
+    def get_requested_parameters(
-        # them in subclasses?
+        self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
-        self.hf_torch = True
+    ):
-        self.nobreakmodel = True
+        requested_parameters = []
        if model_name == "customhuggingface":
            requested_parameters.append(
                {
                    "uitype": "text",
                    "unit": "text",
                    "label": "Huggingface Model Name",
                    "id": "custom_model_name",
                    "default": parameters.get("custom_model_name", ""),
                    "check": {"value": "", "check": "!="},
                    "tooltip": "Model name from https://huggingface.co/",
                    "menu_path": "",
                    "refresh_model_inputs": True,
                    "extra_classes": "",
                }
            )
        if model_name != "customhuggingface" or "custom_model_name" in parameters:
            model_name = parameters.get("custom_model_name", None) or model_name
            alt_model_path = self.get_local_model_path()
            if model_path and os.path.exists(model_path):
                # Use passed model path
                self.model_config = AutoConfig.from_pretrained(model_path)
            elif alt_model_path:
                # Use known model path
                self.model_config = AutoConfig.from_pretrained(
                    alt_model_path,
                    revision=utils.koboldai_vars.revision,
                    cache_dir="cache",
                )
            else:
                # No model path locally, we'll probably have to download
                self.model_config = AutoConfig.from_pretrained(
                    model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
                )
        return requested_parameters
    def set_input_parameters(self, parameters: dict):
        self.model_name = parameters.get("custom_model_name", parameters["id"])
        self.path = parameters.get("path", None)
        logger.info(parameters)
    def unload(self):
        if hasattr(self, "model"):
            self.model = None
        if hasattr(self, "tokenizer"):
            self.tokenizer = None
        if hasattr(self, "model_config"):
            self.model_config = None
        with torch.no_grad():
            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore", message="torch.distributed.reduce_op is deprecated"
                )
                for tensor in gc.get_objects():
                    try:
                        if torch.is_tensor(tensor):
                            tensor.set_(
                                torch.tensor(
                                    (), device=tensor.device, dtype=tensor.dtype
                                )
                            )
                    except:
                        pass
        gc.collect()
        try:
            with torch.no_grad():
                torch.cuda.empty_cache()
        except:
            pass
    def _load(self, save_model: bool, initial_load: bool) -> None:
        utils.koboldai_vars.allowsp = False
@@ -58,12 +136,9 @@ class model_backend(HFInferenceModel):
            self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto"
        )
        if self.usegpu:
            self.model = self.model.to("cuda")
        self.tokenizer = self._get_tokenizer(self.get_local_model_path())
        self.model.kai_model = self
        self.badwordsids = koboldai_settings.badwordsids_default
        utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
        # Patch Huggingface stuff to use our samplers
@@ -143,11 +218,8 @@ class model_backend(HFInferenceModel):
        else:
            gen_in = prompt_tokens
-        if not self.usegpu:
+        device = self.get_auxilary_device()
-            gen_in = gen_in.to("cpu")
+        gen_in = gen_in.to(device)
        else:
            device = self.get_auxilary_device()
            gen_in = gen_in.to(device)
        additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
@@ -179,3 +251,70 @@ class model_backend(HFInferenceModel):
            is_whole_generation=False,
            output_includes_prompt=True,
        )
    def get_local_model_path(
        self, legacy: bool = False, ignore_existance: bool = False
    ) -> Optional[str]:
        """
        Returns a string of the model's path locally, or None if it is not downloaded.
        If ignore_existance is true, it will always return a path.
        """
        if self.path is not None:
            if os.path.exists(self.path):
                return self.path
        if self.model_name in [
            "NeoCustom",
            "GPT2Custom",
            "TPUMeshTransformerGPTJ",
            "TPUMeshTransformerGPTNeoX",
        ]:
            model_path = self.path
            assert model_path
            # Path can be absolute or relative to models directory
            if os.path.exists(model_path):
                return model_path
            model_path = os.path.join("models", model_path)
            try:
                assert os.path.exists(model_path)
            except AssertionError:
                logger.error(
                    f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
                )
                raise
            return model_path
        basename = self.model_name.replace("/", "_")
        if legacy:
            ret = basename
        else:
            ret = os.path.join("models", basename)
        if os.path.isdir(ret) or ignore_existance:
            return ret
        return None
    def init_model_config(self) -> None:
        # Get the model_type from the config or assume a model type if it isn't present
        try:
            self.model_config = AutoConfig.from_pretrained(
                self.get_local_model_path() or self.model_name,
                revision=utils.koboldai_vars.revision,
                cache_dir="cache",
            )
            self.model_type = self.model_config.model_type
        except ValueError:
            self.model_type = {
                "NeoCustom": "gpt_neo",
                "GPT2Custom": "gpt2",
            }.get(self.model)
            if not self.model_type:
                logger.warning(
                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                )
                self.model_type = "gpt_neo"