mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #409 from nkpz/bnb8bit
Configurable quantization level, fix for broken toggles in model settings
This commit is contained in:
@@ -36,13 +36,14 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
else:
|
else:
|
||||||
temp = {}
|
temp = {}
|
||||||
requested_parameters.append({
|
requested_parameters.append({
|
||||||
"uitype": "toggle",
|
"uitype": "dropdown",
|
||||||
"unit": "bool",
|
"unit": "text",
|
||||||
"label": "Use 4-bit",
|
"label": "Quantization",
|
||||||
"id": "use_4_bit",
|
"id": "quantization",
|
||||||
"default": temp['use_4_bit'] if 'use_4_bit' in temp else False,
|
"default": temp['quantization'] if 'quantization' in temp else 'none',
|
||||||
"tooltip": "Whether or not to use BnB's 4-bit mode",
|
"tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
|
||||||
"menu_path": "Layers",
|
"menu_path": "Layers",
|
||||||
|
"children": [{'text': 'None', 'value':'none'},{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}],
|
||||||
"extra_classes": "",
|
"extra_classes": "",
|
||||||
"refresh_model_inputs": False
|
"refresh_model_inputs": False
|
||||||
})
|
})
|
||||||
@@ -52,7 +53,7 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
|
|
||||||
def set_input_parameters(self, parameters):
|
def set_input_parameters(self, parameters):
|
||||||
super().set_input_parameters(parameters)
|
super().set_input_parameters(parameters)
|
||||||
self.use_4_bit = parameters['use_4_bit'] if 'use_4_bit' in parameters else False
|
self.quantization = parameters['quantization'] if 'quantization' in parameters else False
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
utils.koboldai_vars.allowsp = True
|
utils.koboldai_vars.allowsp = True
|
||||||
@@ -82,7 +83,15 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
"low_cpu_mem_usage": True,
|
"low_cpu_mem_usage": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.use_4_bit or utils.koboldai_vars.colab_arg:
|
if self.quantization == "8bit":
|
||||||
|
tf_kwargs.update({
|
||||||
|
"quantization_config":BitsAndBytesConfig(
|
||||||
|
load_in_8bit=True,
|
||||||
|
llm_int8_enable_fp32_cpu_offload=True
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
if self.quantization == "4bit" or utils.koboldai_vars.colab_arg:
|
||||||
tf_kwargs.update({
|
tf_kwargs.update({
|
||||||
"quantization_config":BitsAndBytesConfig(
|
"quantization_config":BitsAndBytesConfig(
|
||||||
load_in_4bit=True,
|
load_in_4bit=True,
|
||||||
@@ -297,7 +306,7 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
"disk_layers": self.disk_layers
|
"disk_layers": self.disk_layers
|
||||||
if "disk_layers" in vars(self)
|
if "disk_layers" in vars(self)
|
||||||
else 0,
|
else 0,
|
||||||
"use_4_bit": self.use_4_bit,
|
"quantization": self.quantization,
|
||||||
},
|
},
|
||||||
f,
|
f,
|
||||||
indent="",
|
indent="",
|
||||||
|
@@ -2011,7 +2011,7 @@ function load_model() {
|
|||||||
data = {}
|
data = {}
|
||||||
if (settings_area) {
|
if (settings_area) {
|
||||||
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
|
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
|
||||||
var element_data = element.value;
|
var element_data = element.getAttribute("data_type") === "bool" ? element.checked : element.value;
|
||||||
if ((element.tagName == "SELECT") && (element.multiple)) {
|
if ((element.tagName == "SELECT") && (element.multiple)) {
|
||||||
element_data = [];
|
element_data = [];
|
||||||
for (var i=0, iLen=element.options.length; i<iLen; i++) {
|
for (var i=0, iLen=element.options.length; i<iLen; i++) {
|
||||||
@@ -2024,8 +2024,6 @@ function load_model() {
|
|||||||
element_data = parseInt(element_data);
|
element_data = parseInt(element_data);
|
||||||
} else if (element.getAttribute("data_type") == "float") {
|
} else if (element.getAttribute("data_type") == "float") {
|
||||||
element_data = parseFloat(element_data);
|
element_data = parseFloat(element_data);
|
||||||
} else if (element.getAttribute("data_type") == "bool") {
|
|
||||||
element_data = (element_data == 'on');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
data[element.id.split("|")[1].replace("_value", "")] = element_data;
|
data[element.id.split("|")[1].replace("_value", "")] = element_data;
|
||||||
|
Reference in New Issue
Block a user