From 290f2ce05e9f56e04ca4b430d8348b201fe37789 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 26 Aug 2023 00:03:28 +0200 Subject: [PATCH] CPU only warning --- modeling/inference_models/generic_hf_torch/class.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 9b1049cf..f95bb24a 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel): def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) + if not utils.koboldai_vars.hascuda: + logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.") + dependency_exists = importlib.util.find_spec("bitsandbytes") if dependency_exists: if model_name != 'customhuggingface' or "custom_model_name" in parameters: @@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel): temp = json.load(f) else: temp = {} - if not hasattr(self.model_config, 'quantization_config'): + if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda: requested_parameters.append({ "uitype": "dropdown", "unit": "text",