mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Add 4-bit BnB toggle
This commit is contained in:
@@ -20,9 +20,29 @@ model_backend_name = "Huggingface"
|
|||||||
model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
|
model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
|
||||||
|
|
||||||
class model_backend(HFTorchInferenceModel):
|
class model_backend(HFTorchInferenceModel):
|
||||||
|
|
||||||
def _initialize_model(self):
|
def _initialize_model(self):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||||
|
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters = {})
|
||||||
|
requested_parameters.append({
|
||||||
|
"uitype": "toggle",
|
||||||
|
"unit": "bool",
|
||||||
|
"label": "Use 4-bit",
|
||||||
|
"id": "use_4_bit",
|
||||||
|
"default": False,
|
||||||
|
"tooltip": "Whether or not to use BnB's 4-bit mode",
|
||||||
|
"menu_path": "Layers",
|
||||||
|
"extra_classes": "",
|
||||||
|
"refresh_model_inputs": False
|
||||||
|
})
|
||||||
|
return requested_parameters
|
||||||
|
|
||||||
|
def set_input_parameters(self, parameters):
|
||||||
|
super().set_input_parameters(parameters)
|
||||||
|
self.use_4_bit = parameters['use_4_bit']
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
utils.koboldai_vars.allowsp = True
|
utils.koboldai_vars.allowsp = True
|
||||||
|
|
||||||
@@ -51,6 +71,12 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
"low_cpu_mem_usage": True,
|
"low_cpu_mem_usage": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.use_4_bit:
|
||||||
|
self.lazy_load = False
|
||||||
|
tf_kwargs.update({
|
||||||
|
"load_in_4bit": True,
|
||||||
|
})
|
||||||
|
|
||||||
if self.model_type == "gpt2":
|
if self.model_type == "gpt2":
|
||||||
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
||||||
# because GPT-2 is not compatible with this feature yet.
|
# because GPT-2 is not compatible with this feature yet.
|
||||||
|
Reference in New Issue
Block a user