diff --git a/modeling/inference_models/basic_hf/class.py b/modeling/inference_models/basic_hf/class.py deleted file mode 100644 index e80c645d..00000000 --- a/modeling/inference_models/basic_hf/class.py +++ /dev/null @@ -1,152 +0,0 @@ -from __future__ import annotations - -import os, time -import json -import torch -from torch.nn import Embedding -import shutil -from typing import Union -import transformers -from transformers import ( - StoppingCriteria, - GPTNeoForCausalLM, - GPT2LMHeadModel, - AutoModelForCausalLM, - AutoConfig, - LogitsProcessorList, -) -from modeling.inference_model import ( - GenerationResult, - GenerationSettings, - ModelCapabilities, - use_core_manipulations, -) - -from modeling.stoppers import Stoppers - -import utils -import koboldai_settings -from logger import logger - - -from modeling.inference_model import InferenceModel - -model_backend_name = "Very Basic Huggingface" -model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) - -LOG_SAMPLER_NO_EFFECT = False - -class model_backend(InferenceModel): - - def __init__(self) -> None: - super().__init__() - self.model_config = None - #self.model_name = model_name - - self.model = None - self.tokenizer = None - self.badwordsids = koboldai_settings.badwordsids_default - self.usegpu = False - - def is_valid(self, model_name, model_path, menu_path): - try: - if model_path is not None and os.path.exists(model_path): - self.model_config = AutoConfig.from_pretrained(model_path) - elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") - else: - self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") - return True - except: - return False - - def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): - requested_parameters = [] - requested_parameters.append({ - "uitype": "toggle", - "unit": "bool", - "label": "Use GPU", - "id": "use_gpu", - "default": True, - "tooltip": "Whether or not to use the GPU", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - return requested_parameters - - def set_input_parameters(self, parameters): - self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None - self.model_name = parameters['id'] - self.path = parameters['path'] if 'path' in parameters else None - - def _load(self, save_model: bool, initial_load: bool) -> None: - self.model_config = AutoConfig.from_pretrained(self.model_name if self.path is None else self.path) - self.model = AutoModelForCausalLM.from_config(self.model_config) - self.tokenizer = self._get_tokenizer(self.model_name if self.path is None else self.path) - - if save_model and self.path is None: - model_path = "models/{}".format(self.model_name.replace("/", "_")) - if not os.path.exists(model_path): - self.tokenizer.save_pretrained(model_path) - self.model.save_pretrained(model_path) - - - if self.usegpu: - # Use just VRAM - self.torch_device = utils.koboldai_vars.gpu_device - self.model = self.model.half().to(self.torch_device) - else: - self.torch_device = "cpu" - self.model = self.model.to(self.torch_device).float() - - utils.koboldai_vars.modeldim = self.model.get_input_embeddings().embedding_dim - - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - if not isinstance(prompt_tokens, torch.Tensor): - gen_in = torch.tensor(prompt_tokens, dtype=torch.long)[None] - else: - gen_in = prompt_tokens - - gen_in = gen_in.to(self.torch_device) - - additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] - - if seed is not None: - torch.manual_seed(seed) - - with torch.no_grad(): - start_time = time.time() - genout = self.model.generate( - gen_in, - do_sample=True, - max_length=min( - len(prompt_tokens) + max_new, utils.koboldai_vars.max_length - ), - repetition_penalty=1.0, - bad_words_ids=self.badwordsids - + additional_bad_words_ids, - use_cache=True, - num_return_sequences=batch_count, - ) - logger.debug( - "torch_raw_generate: run generator {}s".format(time.time() - start_time) - ) - - return GenerationResult( - self, - out_batches=genout, - prompt=prompt_tokens, - is_whole_generation=False, - output_includes_prompt=True, - ) \ No newline at end of file