Merge pull request #391 from one-some/basic-hf-backend

Basic HF backend
2025-06-05 21:59:24 +02:00 · 2023-07-15 02:20:38 +02:00
parent 1fed1b0524 7e2e75070b
commit 6dd73287c9
3 changed files with 369 additions and 9 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -627,19 +627,36 @@ from modeling.patches import patch_transformers
 import importlib
 model_backend_code = {}
 model_backends = {}
 model_backend_module_names = {}
 model_backend_type_crosswalk = {}
 PRIORITIZED_BACKEND_MODULES = ["generic_hf_torch"]
 for module in os.listdir("./modeling/inference_models"):
    if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
        try:
-            model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+            backend_code = importlib.import_module('modeling.inference_models.{}.class'.format(module))
-            model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+            backend_name = backend_code.model_backend_name
-            if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]) and model_backends[model_backend_code[module].model_backend_name].disable:
+            backend_type = backend_code.model_backend_type
-                del model_backends[model_backend_code[module].model_backend_name]
+            backend_object = backend_code.model_backend()
            if "disable" in vars(backend_object) and backend_object.disable:
                continue
            model_backends[backend_name] = backend_object
            model_backend_code[module] = backend_code
            if backend_name in model_backend_module_names:
                raise RuntimeError(f"{module} cannot make backend '{backend_name}'; it already exists!")
            model_backend_module_names[backend_name] = module
            if backend_type in model_backend_type_crosswalk:
                if module in PRIORITIZED_BACKEND_MODULES:
                    model_backend_type_crosswalk[backend_type].insert(0, backend_name)
                else:
-                if model_backend_code[module].model_backend_type in model_backend_type_crosswalk:
+                    model_backend_type_crosswalk[backend_type].append(backend_name)
                    model_backend_type_crosswalk[model_backend_code[module].model_backend_type].append(model_backend_code[module].model_backend_name)
            else:
-                    model_backend_type_crosswalk[model_backend_code[module].model_backend_type] = [model_backend_code[module].model_backend_name]
+                model_backend_type_crosswalk[backend_type] = [backend_name]
        except Exception:
            logger.error("Model Backend {} failed to load".format(module))
@@ -6240,7 +6257,11 @@ def UI_2_select_model(data):
            else:
                #Here we have a model that's not in our menu structure (either a custom model or a custom path
                #so we'll just go through all the possible loaders
-                for model_backend in model_backends:
+                for model_backend in sorted(
                    model_backends,
                    key=lambda x: model_backend_module_names[x] in PRIORITIZED_BACKEND_MODULES,
                    reverse=True,
                ):
                    if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
                        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
                emit("selected_model_info", {"model_backends": valid_loaders})
--- a/modeling/inference_models/basic_hf/class.py
+++ b/modeling/inference_models/basic_hf/class.py
@@ -0,0 +1,338 @@
 from __future__ import annotations
 import gc
 import os
 import shutil
 import time
 import warnings
 from typing import List, Optional, Union
 import torch
 import transformers
 from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
 import utils
 from logger import logger
 import koboldai_settings
 from modeling import warpers
 from modeling.inference_model import (
    GenerationResult,
    GenerationSettings,
    InferenceModel,
    use_core_manipulations,
 )
 model_backend_name = "Basic Huggingface"
 model_backend_type = "Huggingface"
 class model_backend(InferenceModel):
    # Model backends must inherit from InferenceModel.
    def __init__(self) -> None:
        super().__init__()
        self.model_name = "Basic Huggingface"
        self.path = None
    def is_valid(self, model_name, model_path, menu_path):
        try:
            if model_path is not None and os.path.exists(model_path):
                self.model_config = AutoConfig.from_pretrained(model_path)
            elif os.path.exists("models/{}".format(model_name.replace("/", "_"))):
                self.model_config = AutoConfig.from_pretrained(
                    "models/{}".format(model_name.replace("/", "_")),
                    revision=utils.koboldai_vars.revision,
                    cache_dir="cache",
                )
            else:
                self.model_config = AutoConfig.from_pretrained(
                    model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
                )
            return True
        except:
            return False
    def get_requested_parameters(
        self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
    ):
        requested_parameters = []
        if model_name == "customhuggingface":
            requested_parameters.append(
                {
                    "uitype": "text",
                    "unit": "text",
                    "label": "Huggingface Model Name",
                    "id": "custom_model_name",
                    "default": parameters.get("custom_model_name", ""),
                    "check": {"value": "", "check": "!="},
                    "tooltip": "Model name from https://huggingface.co/",
                    "menu_path": "",
                    "refresh_model_inputs": True,
                    "extra_classes": "",
                }
            )
        if model_name != "customhuggingface" or "custom_model_name" in parameters:
            model_name = parameters.get("custom_model_name", None) or model_name
            alt_model_path = self.get_local_model_path()
            if model_path and os.path.exists(model_path):
                # Use passed model path
                self.model_config = AutoConfig.from_pretrained(model_path)
            elif alt_model_path:
                # Use known model path
                self.model_config = AutoConfig.from_pretrained(
                    alt_model_path,
                    revision=utils.koboldai_vars.revision,
                    cache_dir="cache",
                )
            else:
                # No model path locally, we'll probably have to download
                self.model_config = AutoConfig.from_pretrained(
                    model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
                )
        return requested_parameters
    def set_input_parameters(self, parameters: dict):
        self.model_name = parameters.get("custom_model_name", parameters["id"])
        self.path = parameters.get("path", None)
        logger.info(parameters)
    def unload(self):
        if hasattr(self, "model"):
            self.model = None
        if hasattr(self, "tokenizer"):
            self.tokenizer = None
        if hasattr(self, "model_config"):
            self.model_config = None
        with torch.no_grad():
            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore", message="torch.distributed.reduce_op is deprecated"
                )
                for tensor in gc.get_objects():
                    try:
                        if torch.is_tensor(tensor):
                            tensor.set_(
                                torch.tensor(
                                    (), device=tensor.device, dtype=tensor.dtype
                                )
                            )
                    except:
                        pass
        gc.collect()
        try:
            with torch.no_grad():
                torch.cuda.empty_cache()
        except:
            pass
    def _load(self, save_model: bool, initial_load: bool) -> None:
        utils.koboldai_vars.allowsp = False
        if self.model_name == "NeoCustom":
            self.model_name = os.path.basename(os.path.normpath(self.path))
        utils.koboldai_vars.model = self.model_name
        # If we specify a model and it's in the root directory, we need to move
        # it to the models directory (legacy folder structure to new)
        if self.get_local_model_path(legacy=True):
            shutil.move(
                self.get_local_model_path(legacy=True, ignore_existance=True),
                self.get_local_model_path(ignore_existance=True),
            )
        self.init_model_config()
        self.model = AutoModelForCausalLM.from_pretrained(
            self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto"
        )
        self.tokenizer = self._get_tokenizer(self.get_local_model_path())
        self.model.kai_model = self
        self.badwordsids = koboldai_settings.badwordsids_default
        utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
        # Patch Huggingface stuff to use our samplers
        class KoboldLogitsWarperList(LogitsProcessorList):
            def __call__(
                _self,  # Unused
                input_ids: torch.LongTensor,
                scores: torch.FloatTensor,
                *args,
                **kwargs,
            ):
                # Kobold sampling is done here.
                scores = self._apply_warpers(scores=scores, input_ids=input_ids)
                # Things like Lua integration, phrase bias, and probability visualization are done here.
                for processor in self.logits_processors:
                    scores = processor(self, scores=scores, input_ids=input_ids)
                    assert (
                        scores is not None
                    ), f"Scores are None; processor '{processor}' is to blame"
                return scores
        def new_sample(self, *args, **kwargs):
            assert kwargs.pop("logits_warper", None) is not None
            kwargs["logits_warper"] = KoboldLogitsWarperList()
            if utils.koboldai_vars.newlinemode in ["s", "ns"]:
                kwargs["eos_token_id"] = -1
                kwargs.setdefault("pad_token_id", 2)
            return new_sample.old_sample(self, *args, **kwargs)
        new_sample.old_sample = transformers.GenerationMixin.sample
        use_core_manipulations.sample = new_sample
    def _apply_warpers(
        self, scores: torch.Tensor, input_ids: torch.Tensor
    ) -> torch.Tensor:
        """Applies samplers/warpers to the given scores, returning the altered scores.
        Args:
            scores (torch.Tensor): The original scores.
            input_ids (torch.Tensor): The input token sequence.
        Returns:
            torch.Tensor: The altered scores.
        """
        warpers.update_settings()
        for sid in utils.koboldai_vars.sampler_order:
            warper = warpers.Warper.from_id(sid)
            if not warper.value_is_valid():
                continue
            if warper == warpers.RepetitionPenalty:
                # Rep pen needs access to input tokens to decide what to penalize
                scores = warper.torch(scores, input_ids=input_ids)
            else:
                scores = warper.torch(scores)
            assert scores is not None, f"Scores are None; warper '{warper}' is to blame"
        return scores
    def _raw_generate(
        self,
        prompt_tokens: Union[List[int], torch.Tensor],
        max_new: int,
        gen_settings: GenerationSettings,
        single_line: bool = False,
        batch_count: int = 1,
        seed: Optional[int] = None,
        **kwargs,
    ) -> GenerationResult:
        if not isinstance(prompt_tokens, torch.Tensor):
            gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None]
        else:
            gen_in = prompt_tokens
        device = self.get_auxilary_device()
        gen_in = gen_in.to(device)
        additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
        if seed is not None:
            torch.manual_seed(seed)
        with torch.no_grad():
            start_time = time.time()
            genout = self.model.generate(
                gen_in,
                do_sample=True,
                max_length=min(
                    len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
                ),
                repetition_penalty=1.0,
                bad_words_ids=self.badwordsids + additional_bad_words_ids,
                use_cache=True,
                num_return_sequences=batch_count,
            )
        logger.debug(
            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
        )
        return GenerationResult(
            self,
            out_batches=genout,
            prompt=prompt_tokens,
            is_whole_generation=False,
            output_includes_prompt=True,
        )
    def get_local_model_path(
        self, legacy: bool = False, ignore_existance: bool = False
    ) -> Optional[str]:
        """
        Returns a string of the model's path locally, or None if it is not downloaded.
        If ignore_existance is true, it will always return a path.
        """
        if self.path is not None:
            if os.path.exists(self.path):
                return self.path
        if self.model_name in [
            "NeoCustom",
            "GPT2Custom",
            "TPUMeshTransformerGPTJ",
            "TPUMeshTransformerGPTNeoX",
        ]:
            model_path = self.path
            assert model_path
            # Path can be absolute or relative to models directory
            if os.path.exists(model_path):
                return model_path
            model_path = os.path.join("models", model_path)
            try:
                assert os.path.exists(model_path)
            except AssertionError:
                logger.error(
                    f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
                )
                raise
            return model_path
        basename = self.model_name.replace("/", "_")
        if legacy:
            ret = basename
        else:
            ret = os.path.join("models", basename)
        if os.path.isdir(ret) or ignore_existance:
            return ret
        return None
    def init_model_config(self) -> None:
        # Get the model_type from the config or assume a model type if it isn't present
        try:
            self.model_config = AutoConfig.from_pretrained(
                self.get_local_model_path() or self.model_name,
                revision=utils.koboldai_vars.revision,
                cache_dir="cache",
            )
            self.model_type = self.model_config.model_type
        except ValueError:
            self.model_type = {
                "NeoCustom": "gpt_neo",
                "GPT2Custom": "gpt2",
            }.get(self.model)
            if not self.model_type:
                logger.warning(
                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                )
                self.model_type = "gpt_neo"
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel):
        self.model_config = None
        #self.model_name = model_name
        self.hf_torch = False
        self.model = None
        self.tokenizer = None
        self.badwordsids = koboldai_settings.badwordsids_default