Delete basic 4bit

And add code to handle dangling __pycache__s
2025-06-05 21:59:24 +02:00 · 2023-07-17 18:16:03 -05:00
parent 5c3a8e295a
commit 1637760fa1
2 changed files with 38 additions and 367 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -633,7 +633,22 @@ model_backend_type_crosswalk = {}
 PRIORITIZED_BACKEND_MODULES = ["generic_hf_torch"]

 for module in os.listdir("./modeling/inference_models"):
-    if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
+    if module == '__pycache__':
+        continue
+
+    module_path = os.path.join("modeling/inference_models", module)
+    if not os.path.isdir(module_path):
+        # Drop-in modules must be folders
+        continue
+
+    if os.listdir(module_path) == ["__pycache__"]:
+        # Delete backends which have been deleted upstream. As __pycache__
+        # folders aren't tracked, they'll stick around until we zap em'
+        assert len(os.listdir(module_path)) == 1
+        logger.info(f"Deleting old backend {module}")
+        shutil.rmtree(module_path)
+        continue
+
    try:
        backend_code = importlib.import_module('modeling.inference_models.{}.class'.format(module))
        backend_name = backend_code.model_backend_name
--- a/modeling/inference_models/basic_hf_4bit/class.py
+++ b/modeling/inference_models/basic_hf_4bit/class.py
@@ -1,344 +0,0 @@
-from __future__ import annotations
-
-import gc
-import os
-import shutil
-import time
-import warnings
-from typing import List, Optional, Union
-
-import torch
-import transformers
-from transformers import AutoConfig, AutoModelForCausalLM, LogitsProcessorList
-
-import utils
-from logger import logger
-import koboldai_settings
-from modeling import warpers
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-    use_core_manipulations,
-)
-
-model_backend_name = "Basic Huggingface 4-bit"
-model_backend_type = "Huggingface"
-
-
-class model_backend(InferenceModel):
-    # Model backends must inherit from InferenceModel.
-
-    def __init__(self) -> None:
-        super().__init__()
-        import importlib
-        dependency_exists = importlib.util.find_spec("bitsandbytes")
-        if dependency_exists:        
-            self.model_name = "Basic Huggingface"
-            self.path = None
-        else:
-            logger.warning("Bitsandbytes is not installed, you can not use Huggingface models in 4-bit")
-            self.disable = True
-
-    def is_valid(self, model_name, model_path, menu_path):
-        try:
-            if model_path is not None and os.path.exists(model_path):
-                self.model_config = AutoConfig.from_pretrained(model_path)
-            elif os.path.exists("models/{}".format(model_name.replace("/", "_"))):
-                self.model_config = AutoConfig.from_pretrained(
-                    "models/{}".format(model_name.replace("/", "_")),
-                    revision=utils.koboldai_vars.revision,
-                    cache_dir="cache",
-                )
-            else:
-                self.model_config = AutoConfig.from_pretrained(
-                    model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
-                )
-            return True
-        except:
-            return False
-
-    def get_requested_parameters(
-        self, model_name: str, model_path: str, menu_path: str, parameters: dict = {}
-    ):
-        requested_parameters = []
-
-        if model_name == "customhuggingface":
-            requested_parameters.append(
-                {
-                    "uitype": "text",
-                    "unit": "text",
-                    "label": "Huggingface Model Name",
-                    "id": "custom_model_name",
-                    "default": parameters.get("custom_model_name", ""),
-                    "check": {"value": "", "check": "!="},
-                    "tooltip": "Model name from https://huggingface.co/",
-                    "menu_path": "",
-                    "refresh_model_inputs": True,
-                    "extra_classes": "",
-                }
-            )
-
-        if model_name != "customhuggingface" or "custom_model_name" in parameters:
-            model_name = parameters.get("custom_model_name", None) or model_name
-            alt_model_path = self.get_local_model_path()
-
-            if model_path and os.path.exists(model_path):
-                # Use passed model path
-                self.model_config = AutoConfig.from_pretrained(model_path)
-            elif alt_model_path:
-                # Use known model path
-                self.model_config = AutoConfig.from_pretrained(
-                    alt_model_path,
-                    revision=utils.koboldai_vars.revision,
-                    cache_dir="cache",
-                )
-            else:
-                # No model path locally, we'll probably have to download
-                self.model_config = AutoConfig.from_pretrained(
-                    model_name, revision=utils.koboldai_vars.revision, cache_dir="cache"
-                )
-
-        return requested_parameters
-
-    def set_input_parameters(self, parameters: dict):
-        self.model_name = parameters.get("custom_model_name", parameters["id"])
-        self.path = parameters.get("path", None)
-        logger.info(parameters)
-
-    def unload(self):
-        if hasattr(self, "model"):
-            self.model = None
-
-        if hasattr(self, "tokenizer"):
-            self.tokenizer = None
-
-        if hasattr(self, "model_config"):
-            self.model_config = None
-
-        with torch.no_grad():
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", message="torch.distributed.reduce_op is deprecated"
-                )
-                for tensor in gc.get_objects():
-                    try:
-                        if torch.is_tensor(tensor):
-                            tensor.set_(
-                                torch.tensor(
-                                    (), device=tensor.device, dtype=tensor.dtype
-                                )
-                            )
-                    except:
-                        pass
-        gc.collect()
-
-        try:
-            with torch.no_grad():
-                torch.cuda.empty_cache()
-        except:
-            pass
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        utils.koboldai_vars.allowsp = False
-
-        if self.model_name == "NeoCustom":
-            self.model_name = os.path.basename(os.path.normpath(self.path))
-        utils.koboldai_vars.model = self.model_name
-
-        # If we specify a model and it's in the root directory, we need to move
-        # it to the models directory (legacy folder structure to new)
-        if self.get_local_model_path(legacy=True):
-            shutil.move(
-                self.get_local_model_path(legacy=True, ignore_existance=True),
-                self.get_local_model_path(ignore_existance=True),
-            )
-
-        self.init_model_config()
-
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.get_local_model_path(), low_cpu_mem_usage=True, device_map="auto", load_in_4bit=True
-        )
-
-        self.tokenizer = self._get_tokenizer(self.get_local_model_path())
-        self.model.kai_model = self
-        self.badwordsids = koboldai_settings.badwordsids_default
-        utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim
-
-        # Patch Huggingface stuff to use our samplers
-        class KoboldLogitsWarperList(LogitsProcessorList):
-            def __call__(
-                _self,  # Unused
-                input_ids: torch.LongTensor,
-                scores: torch.FloatTensor,
-                *args,
-                **kwargs,
-            ):
-                # Kobold sampling is done here.
-                scores = self._apply_warpers(scores=scores, input_ids=input_ids)
-
-                # Things like Lua integration, phrase bias, and probability visualization are done here.
-                for processor in self.logits_processors:
-                    scores = processor(self, scores=scores, input_ids=input_ids)
-                    assert (
-                        scores is not None
-                    ), f"Scores are None; processor '{processor}' is to blame"
-                return scores
-
-        def new_sample(self, *args, **kwargs):
-            assert kwargs.pop("logits_warper", None) is not None
-            kwargs["logits_warper"] = KoboldLogitsWarperList()
-
-            if utils.koboldai_vars.newlinemode in ["s", "ns"]:
-                kwargs["eos_token_id"] = -1
-                kwargs.setdefault("pad_token_id", 2)
-
-            return new_sample.old_sample(self, *args, **kwargs)
-
-        new_sample.old_sample = transformers.GenerationMixin.sample
-        use_core_manipulations.sample = new_sample
-
-    def _apply_warpers(
-        self, scores: torch.Tensor, input_ids: torch.Tensor
-    ) -> torch.Tensor:
-        """Applies samplers/warpers to the given scores, returning the altered scores.
-
-        Args:
-            scores (torch.Tensor): The original scores.
-            input_ids (torch.Tensor): The input token sequence.
-
-        Returns:
-            torch.Tensor: The altered scores.
-        """
-        warpers.update_settings()
-
-        for sid in utils.koboldai_vars.sampler_order:
-            warper = warpers.Warper.from_id(sid)
-
-            if not warper.value_is_valid():
-                continue
-
-            if warper == warpers.RepetitionPenalty:
-                # Rep pen needs access to input tokens to decide what to penalize
-                scores = warper.torch(scores, input_ids=input_ids)
-            else:
-                scores = warper.torch(scores)
-
-            assert scores is not None, f"Scores are None; warper '{warper}' is to blame"
-        return scores
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-        if not isinstance(prompt_tokens, torch.Tensor):
-            gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None]
-        else:
-            gen_in = prompt_tokens
-
-        device = self.get_auxilary_device()
-        gen_in = gen_in.to(device)
-
-        additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
-
-        if seed is not None:
-            torch.manual_seed(seed)
-
-        with torch.no_grad():
-            start_time = time.time()
-            genout = self.model.generate(
-                gen_in,
-                do_sample=True,
-                max_length=min(
-                    len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
-                ),
-                repetition_penalty=1.0,
-                bad_words_ids=self.badwordsids + additional_bad_words_ids,
-                use_cache=True,
-                num_return_sequences=batch_count,
-            )
-
-        logger.debug(
-            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
-        )
-
-        return GenerationResult(
-            self,
-            out_batches=genout,
-            prompt=prompt_tokens,
-            is_whole_generation=False,
-            output_includes_prompt=True,
-        )
-
-    def get_local_model_path(
-        self, legacy: bool = False, ignore_existance: bool = False
-    ) -> Optional[str]:
-        """
-        Returns a string of the model's path locally, or None if it is not downloaded.
-        If ignore_existance is true, it will always return a path.
-        """
-        if self.path is not None:
-            if os.path.exists(self.path):
-                return self.path
-
-        if self.model_name in [
-            "NeoCustom",
-            "GPT2Custom",
-            "TPUMeshTransformerGPTJ",
-            "TPUMeshTransformerGPTNeoX",
-        ]:
-            model_path = self.path
-            assert model_path
-
-            # Path can be absolute or relative to models directory
-            if os.path.exists(model_path):
-                return model_path
-
-            model_path = os.path.join("models", model_path)
-
-            try:
-                assert os.path.exists(model_path)
-            except AssertionError:
-                logger.error(
-                    f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'."
-                )
-                raise
-
-            return model_path
-
-        basename = self.model_name.replace("/", "_")
-        if legacy:
-            ret = basename
-        else:
-            ret = os.path.join("models", basename)
-
-        if os.path.isdir(ret) or ignore_existance:
-            return ret
-        return None
-
-    def init_model_config(self) -> None:
-        # Get the model_type from the config or assume a model type if it isn't present
-        try:
-            self.model_config = AutoConfig.from_pretrained(
-                self.get_local_model_path() or self.model_name,
-                revision=utils.koboldai_vars.revision,
-                cache_dir="cache",
-            )
-            self.model_type = self.model_config.model_type
-        except ValueError:
-            self.model_type = {
-                "NeoCustom": "gpt_neo",
-                "GPT2Custom": "gpt2",
-            }.get(self.model)
-
-            if not self.model_type:
-                logger.warning(
-                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
-                )
-                self.model_type = "gpt_neo"