mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Model: Refuse to serve certain models over the API
This commit is contained in:
12
aiserver.py
12
aiserver.py
@@ -3555,16 +3555,8 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
|
|||||||
return genout
|
return genout
|
||||||
|
|
||||||
def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=False, use_authors_note=False):
|
def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=False, use_authors_note=False):
|
||||||
if(koboldai_vars.model == "Colab"):
|
if not model or not model.capabilties.api_host:
|
||||||
raise NotImplementedError("API generation is not supported in old Colab API mode.")
|
raise NotImplementedError(f"API generation isn't allowed on model '{koboldai_vars.model}'")
|
||||||
elif(koboldai_vars.model == "API"):
|
|
||||||
raise NotImplementedError("API generation is not supported in API mode.")
|
|
||||||
elif(koboldai_vars.model == "CLUSTER"):
|
|
||||||
raise NotImplementedError("API generation is not supported in API mode.")
|
|
||||||
elif(koboldai_vars.model == "OAI"):
|
|
||||||
raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.")
|
|
||||||
elif(koboldai_vars.model == "ReadOnly"):
|
|
||||||
raise NotImplementedError("API generation is not supported in read-only mode; please load a model and then try again.")
|
|
||||||
|
|
||||||
data = applyinputformatting(data)
|
data = applyinputformatting(data)
|
||||||
|
|
||||||
|
@@ -156,6 +156,9 @@ class ModelCapabilities:
|
|||||||
# TODO: Support non-live probabilities from APIs
|
# TODO: Support non-live probabilities from APIs
|
||||||
post_token_probs: bool = False
|
post_token_probs: bool = False
|
||||||
|
|
||||||
|
# Some models cannot be hosted over the API, namely the API itself.
|
||||||
|
api_host: bool = True
|
||||||
|
|
||||||
|
|
||||||
class InferenceModel:
|
class InferenceModel:
|
||||||
"""Root class for all models."""
|
"""Root class for all models."""
|
||||||
|
@@ -14,6 +14,7 @@ from modeling.inference_model import (
|
|||||||
GenerationResult,
|
GenerationResult,
|
||||||
GenerationSettings,
|
GenerationSettings,
|
||||||
InferenceModel,
|
InferenceModel,
|
||||||
|
ModelCapabilities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -26,8 +27,12 @@ class APIInferenceModel(InferenceModel):
|
|||||||
tokenizer_id = requests.get(
|
tokenizer_id = requests.get(
|
||||||
utils.koboldai_vars.colaburl[:-8] + "/api/v1/model",
|
utils.koboldai_vars.colaburl[:-8] + "/api/v1/model",
|
||||||
).json()["result"]
|
).json()["result"]
|
||||||
|
|
||||||
self.tokenizer = self._get_tokenizer(tokenizer_id)
|
self.tokenizer = self._get_tokenizer(tokenizer_id)
|
||||||
|
|
||||||
|
# Do not allow API to be served over the API
|
||||||
|
self.capabilties = ModelCapabilities(api_host=False)
|
||||||
|
|
||||||
def _raw_generate(
|
def _raw_generate(
|
||||||
self,
|
self,
|
||||||
prompt_tokens: Union[List[int], torch.Tensor],
|
prompt_tokens: Union[List[int], torch.Tensor],
|
||||||
|
@@ -11,6 +11,7 @@ from modeling.inference_model import (
|
|||||||
GenerationResult,
|
GenerationResult,
|
||||||
GenerationSettings,
|
GenerationSettings,
|
||||||
InferenceModel,
|
InferenceModel,
|
||||||
|
ModelCapabilities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -19,6 +20,12 @@ class ColabException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class ColabInferenceModel(InferenceModel):
|
class ColabInferenceModel(InferenceModel):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# Do not allow API to be served over the API
|
||||||
|
self.capabilties = ModelCapabilities(api_host=False)
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
|
self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
|
||||||
|
|
||||||
|
@@ -13,6 +13,7 @@ from modeling.inference_model import (
|
|||||||
GenerationResult,
|
GenerationResult,
|
||||||
GenerationSettings,
|
GenerationSettings,
|
||||||
InferenceModel,
|
InferenceModel,
|
||||||
|
ModelCapabilities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,6 +22,12 @@ class HordeException(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class HordeInferenceModel(InferenceModel):
|
class HordeInferenceModel(InferenceModel):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# Do not allow API to be served over the API
|
||||||
|
self.capabilties = ModelCapabilities(api_host=False)
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
self.tokenizer = self._get_tokenizer(
|
self.tokenizer = self._get_tokenizer(
|
||||||
utils.koboldai_vars.cluster_requested_models[0]
|
utils.koboldai_vars.cluster_requested_models[0]
|
||||||
|
Reference in New Issue
Block a user