From 6b26cbbd0a50f7deb3b4c5551aee87cc81ea575a Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 22 Jul 2023 17:20:40 -0500 Subject: [PATCH] Backends: Fix ReadOnly Since somewhere in the pipeline ReadOnly is ignored, the bug wasn't actually apparent unless using things like the Robot Button in WI cards. --- modeling/inference_model.py | 7 ++- modeling/inference_models/readonly/class.py | 49 ++++++++++++--------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index a2d4fa63..28d96473 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -597,7 +597,12 @@ class InferenceModel: ) time_end = round(time.time() - time_start, 2) - tokens_per_second = round(len(result.encoded[0]) / time_end, 2) + + try: + tokens_per_second = round(len(result.encoded[0]) / time_end, 2) + except ZeroDivisionError: + # Introducing KoboldAI's fastest model: ReadOnly! + tokens_per_second = 0 if not utils.koboldai_vars.quiet: logger.info( diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py index 13c38baf..cbdb298d 100644 --- a/modeling/inference_models/readonly/class.py +++ b/modeling/inference_models/readonly/class.py @@ -1,12 +1,10 @@ from __future__ import annotations import torch -import requests import numpy as np from typing import List, Optional, Union import utils -from logger import logger from modeling.inference_model import ( GenerationResult, GenerationSettings, @@ -15,29 +13,46 @@ from modeling.inference_model import ( ) model_backend_name = "Read Only" -model_backend_type = "Read Only" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) +model_backend_type = "Read Only" # This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) -class BasicAPIException(Exception): - """To be used for errors when using the Basic API as an interface.""" + +class DummyHFTokenizerOut: + input_ids = np.array([[]]) + + +class FacadeTokenizer: + def __init__(self): + self._koboldai_header = [] + + def decode(self, _input): + return "" + + def encode(self, input_text): + return [] + + def __call__(self, *args, **kwargs) -> DummyHFTokenizerOut: + return DummyHFTokenizerOut() class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() - # Do not allow API to be served over the API + # Do not allow ReadOnly to be served over the API self.capabilties = ModelCapabilities(api_host=False) - self.tokenizer = self._tokenizer() + self.tokenizer: FacadeTokenizer = None self.model = None self.model_name = "Read Only" - + def is_valid(self, model_name, model_path, menu_path): return model_name == "ReadOnly" - - def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + + def get_requested_parameters( + self, model_name, model_path, menu_path, parameters={} + ): requested_parameters = [] return requested_parameters - + def set_input_parameters(self, parameters): return @@ -46,17 +61,9 @@ class model_backend(InferenceModel): def _initialize_model(self): return - - class _tokenizer(): - def __init__(self): - self._koboldai_header = [] - def decode(self, _input): - return "" - def encode(self, input_text): - return [] def _load(self, save_model: bool = False, initial_load: bool = False) -> None: - self.tokenizer = self.tokenizer + self.tokenizer = FacadeTokenizer() self.model = None utils.koboldai_vars.noai = True @@ -72,7 +79,7 @@ class model_backend(InferenceModel): ): return GenerationResult( model=self, - out_batches=np.array([]), + out_batches=np.array([[]]), prompt=prompt_tokens, is_whole_generation=True, single_line=single_line,