diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 49c6ca33..25d49214 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -36,24 +36,14 @@ class model_backend(HFTorchInferenceModel): else: temp = {} requested_parameters.append({ - "uitype": "toggle", - "unit": "bool", - "label": "Use 8-bit", - "id": "use_8_bit", - "default": temp['use_8_bit'] if 'use_8_bit' in temp else False, - "tooltip": "Whether or not to use BnB's 8-bit mode", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - requested_parameters.append({ - "uitype": "toggle", - "unit": "bool", - "label": "Use 4-bit", - "id": "use_4_bit", - "default": temp['use_4_bit'] if 'use_4_bit' in temp else False, - "tooltip": "Whether or not to use BnB's 4-bit mode", + "uitype": "dropdown", + "unit": "text", + "label": "Quantization", + "id": "quantization", + "default": temp['quantization'] if 'quantization' in temp else 'none', + "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode", "menu_path": "Layers", + "children": [{'text': 'None', 'value':'none'},{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}], "extra_classes": "", "refresh_model_inputs": False }) @@ -63,8 +53,7 @@ class model_backend(HFTorchInferenceModel): def set_input_parameters(self, parameters): super().set_input_parameters(parameters) - self.use_4_bit = parameters['use_4_bit'] if 'use_4_bit' in parameters else False - self.use_8_bit = parameters['use_8_bit'] if 'use_8_bit' in parameters else False + self.quantization = parameters['quantization'] if 'quantization' in parameters else False def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True @@ -94,7 +83,7 @@ class model_backend(HFTorchInferenceModel): "low_cpu_mem_usage": True, } - if self.use_8_bit: + if self.quantization == "8bit": tf_kwargs.update({ "quantization_config":BitsAndBytesConfig( load_in_8bit=True, @@ -102,7 +91,7 @@ class model_backend(HFTorchInferenceModel): ), }) - if self.use_4_bit or utils.koboldai_vars.colab_arg: + if self.quantization == "4bit" or utils.koboldai_vars.colab_arg: tf_kwargs.update({ "quantization_config":BitsAndBytesConfig( load_in_4bit=True, @@ -317,8 +306,7 @@ class model_backend(HFTorchInferenceModel): "disk_layers": self.disk_layers if "disk_layers" in vars(self) else 0, - "use_4_bit": self.use_4_bit, - "use_8_bit": self.use_8_bit, + "quantization": self.quantization, }, f, indent="",