mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge remote-tracking branch 'upstream/united' into 4bit-plugin
This commit is contained in:
12
aiserver.py
12
aiserver.py
@@ -1894,6 +1894,7 @@ def load_model(model_backend, initial_load=False):
|
||||
logger.message(f"KoboldAI has finished loading and is available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite")
|
||||
logger.message(f"KoboldAI has finished loading and is available at the following link for the API: {koboldai_vars.cloudflare_link}/api")
|
||||
|
||||
|
||||
# Setup IP Whitelisting
|
||||
# Define a function to check if IP is allowed
|
||||
def is_allowed_ip():
|
||||
@@ -10901,13 +10902,14 @@ def run():
|
||||
with open('cloudflare.log', 'w') as cloudflarelog:
|
||||
cloudflarelog.write("KoboldAI is available at the following link : " + cloudflare)
|
||||
logger.init_ok("Webserver", status="OK")
|
||||
if not koboldai_vars.use_colab_tpu:
|
||||
if not koboldai_vars.use_colab_tpu and args.model:
|
||||
# If we're using a TPU our UI will freeze during the connection to the TPU. To prevent this from showing to the user we
|
||||
# delay the display of this message until after that step
|
||||
logger.message(f"KoboldAI is available at the following link for UI 1: {cloudflare}")
|
||||
logger.message(f"KoboldAI is available at the following link for UI 2: {cloudflare}/new_ui")
|
||||
logger.message(f"KoboldAI is available at the following link for KoboldAI Lite: {cloudflare}/lite")
|
||||
logger.message(f"KoboldAI is available at the following link for the API: {cloudflare}/api")
|
||||
logger.message(f"KoboldAI is still loading your model but available at the following link for UI 1: {cloudflare}")
|
||||
logger.message(f"KoboldAI is still loading your model but available at the following link for UI 2: {cloudflare}/new_ui")
|
||||
logger.message(f"KoboldAI is still loading your model but available at the following link for KoboldAI Lite: {cloudflare}/lite")
|
||||
logger.message(f"KoboldAI is still loading your model but available at the following link for the API: [Loading Model...]")
|
||||
logger.message(f"While the model loads you can use the above links to begin setting up your session, for generations you must wait until after its done loading.")
|
||||
else:
|
||||
logger.init_ok("Webserver", status="OK")
|
||||
logger.message(f"Webserver has started, you can now connect to this machine at port: {port}")
|
||||
|
@@ -32,7 +32,7 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.30.1
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
|
@@ -30,7 +30,7 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.30.1
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
|
@@ -54,13 +54,14 @@ class model_backend(HFTorchInferenceModel):
|
||||
else:
|
||||
temp = {}
|
||||
requested_parameters.append({
|
||||
"uitype": "toggle",
|
||||
"unit": "bool",
|
||||
"label": "Use 4-bit",
|
||||
"id": "use_4_bit",
|
||||
"default": temp['use_4_bit'] if 'use_4_bit' in temp else False,
|
||||
"tooltip": "Whether or not to use BnB's 4-bit mode",
|
||||
"uitype": "dropdown",
|
||||
"unit": "text",
|
||||
"label": "Quantization",
|
||||
"id": "quantization",
|
||||
"default": temp['quantization'] if 'quantization' in temp else 'none',
|
||||
"tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
|
||||
"menu_path": "Layers",
|
||||
"children": [{'text': 'None', 'value':'none'},{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}],
|
||||
"extra_classes": "",
|
||||
"refresh_model_inputs": False
|
||||
})
|
||||
@@ -70,7 +71,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
|
||||
def set_input_parameters(self, parameters):
|
||||
super().set_input_parameters(parameters)
|
||||
self.use_4_bit = parameters['use_4_bit'] if 'use_4_bit' in parameters else False
|
||||
self.quantization = parameters['quantization'] if 'quantization' in parameters else False
|
||||
|
||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||
utils.koboldai_vars.allowsp = True
|
||||
@@ -100,7 +101,15 @@ class model_backend(HFTorchInferenceModel):
|
||||
"low_cpu_mem_usage": True,
|
||||
}
|
||||
|
||||
if self.use_4_bit or utils.koboldai_vars.colab_arg:
|
||||
if self.quantization == "8bit":
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_8bit=True,
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
|
||||
if self.quantization == "4bit" or utils.koboldai_vars.colab_arg:
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
@@ -119,6 +128,11 @@ class model_backend(HFTorchInferenceModel):
|
||||
# Also, lazy loader doesn't support GPT-2 models
|
||||
self.lazy_load = False
|
||||
|
||||
if self.model_type == "llama":
|
||||
tf_kwargs.update({
|
||||
"pretraining_tp": 1 # Workaround recommended by HF to fix their mistake on the config.json tuners adopted
|
||||
})
|
||||
|
||||
logger.debug(
|
||||
"lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(
|
||||
self.lazy_load,
|
||||
@@ -315,7 +329,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
"disk_layers": self.disk_layers
|
||||
if "disk_layers" in vars(self)
|
||||
else 0,
|
||||
"use_4_bit": self.use_4_bit,
|
||||
"quantization": self.quantization,
|
||||
},
|
||||
f,
|
||||
indent="",
|
||||
|
@@ -330,6 +330,11 @@ class HFInferenceModel(InferenceModel):
|
||||
if any(c in str(k) for c in "[]")
|
||||
]
|
||||
|
||||
try:
|
||||
self.badwordsids.remove([self.tokenizer.pad_token_id])
|
||||
except:
|
||||
pass
|
||||
|
||||
if utils.koboldai_vars.newlinemode == "n":
|
||||
self.badwordsids.append([self.tokenizer.eos_token_id])
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
transformers==4.30.*
|
||||
transformers==4.31.*
|
||||
huggingface_hub==0.15.1
|
||||
Flask==2.2.3
|
||||
Flask-SocketIO==5.3.2
|
||||
|
@@ -2011,7 +2011,7 @@ function load_model() {
|
||||
data = {}
|
||||
if (settings_area) {
|
||||
for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
|
||||
var element_data = element.value;
|
||||
var element_data = element.getAttribute("data_type") === "bool" ? element.checked : element.value;
|
||||
if ((element.tagName == "SELECT") && (element.multiple)) {
|
||||
element_data = [];
|
||||
for (var i=0, iLen=element.options.length; i<iLen; i++) {
|
||||
@@ -2024,8 +2024,6 @@ function load_model() {
|
||||
element_data = parseInt(element_data);
|
||||
} else if (element.getAttribute("data_type") == "float") {
|
||||
element_data = parseFloat(element_data);
|
||||
} else if (element.getAttribute("data_type") == "bool") {
|
||||
element_data = (element_data == 'on');
|
||||
}
|
||||
}
|
||||
data[element.id.split("|")[1].replace("_value", "")] = element_data;
|
||||
@@ -2410,12 +2408,12 @@ function world_info_entry(data) {
|
||||
comment.setAttribute("uid", data.uid);
|
||||
comment.value = data.comment;
|
||||
comment.onchange = function () {
|
||||
world_info_data[this.getAttribute('uid')]['comment'] = this.textContent;
|
||||
send_world_info(this.getAttribute('uid'));
|
||||
world_info_data[data.uid].comment = this.value;
|
||||
send_world_info(data.uid);
|
||||
this.classList.add("pulse");
|
||||
}
|
||||
comment.classList.remove("pulse");
|
||||
|
||||
|
||||
//Let's figure out the order to insert this card
|
||||
var found = false;
|
||||
var moved = false;
|
||||
|
@@ -19,8 +19,8 @@
|
||||
<span id="settings_flyout_tab_home" class="setting_menu_button tab tab-settings selected" tab-target="setting_menu_home" onclick="selectTab(this);">Home</span>
|
||||
<span id="settings_flyout_tab_settings" class="setting_menu_button tab tab-settings" tab-target="setting_menu_settings" onclick="selectTab(this);">Settings</span>
|
||||
<span id="settings_flyout_tab_interface" class="setting_menu_button tab tab-settings" tab-target="setting_menu_interface" onclick="selectTab(this);">Interface</span>
|
||||
<span style="float: right;margin-right: 30px;padding: 0px 10px;" onclick="window.open('https://github.com/KoboldAI/KoboldAI-Client/wiki');">
|
||||
Help
|
||||
<span style="float: right;margin-right: 30px;padding: 0px 10px;" onclick="window.open('/lite');">
|
||||
Lite
|
||||
<icon class="material-icons-outlined" style="font-size:14px;position:relative;top:2px;">open_in_new</icon>
|
||||
</span>
|
||||
</div>
|
||||
|
Reference in New Issue
Block a user