mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Overhaul 4-bit support to load with a toggle
This commit is contained in:
145
aiserver.py
145
aiserver.py
@@ -70,7 +70,7 @@ from utils import debounce
|
||||
import utils
|
||||
import koboldai_settings
|
||||
import torch
|
||||
from transformers import StoppingCriteria, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, PreTrainedModel, modeling_utils, AutoModelForTokenClassification
|
||||
from transformers import StoppingCriteria, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, PreTrainedModel, modeling_utils, AutoModelForTokenClassification, LlamaTokenizer
|
||||
from transformers import __version__ as transformers_version
|
||||
import transformers
|
||||
try:
|
||||
@@ -1114,14 +1114,20 @@ def device_config(config):
|
||||
koboldai_vars.usegpu = False
|
||||
return
|
||||
|
||||
def move_model_to_devices(model):
|
||||
def move_model_to_devices(model, use_4_bit=False):
|
||||
global generator
|
||||
|
||||
if(not utils.HAS_ACCELERATE and not koboldai_vars.breakmodel):
|
||||
if(koboldai_vars.usegpu):
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
if not use_4_bit:
|
||||
model = model.half().to(koboldai_vars.gpu_device)
|
||||
else:
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
else:
|
||||
model = model.to('cpu')
|
||||
if not use_4_bit:
|
||||
model = model.to('cpu').float()
|
||||
else:
|
||||
model = model.to('cpu')
|
||||
generator = model.generate
|
||||
return
|
||||
|
||||
@@ -1149,6 +1155,8 @@ def move_model_to_devices(model):
|
||||
generator = model.generate
|
||||
return
|
||||
|
||||
if not use_4_bit:
|
||||
model.half()
|
||||
gc.collect()
|
||||
|
||||
if(hasattr(model, "transformer")):
|
||||
@@ -1518,11 +1526,6 @@ def general_startup(override_args=None):
|
||||
parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen")
|
||||
parser.add_argument('-q', '--quiesce', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen")
|
||||
|
||||
# 4-bit stuff
|
||||
parser.add_argument('--gptj4bit', help="Load a GPT-J model 4-bit pt file with this path")
|
||||
parser.add_argument('--gptneox4bit', help="Load a GPT-NeoX model 4-bit pt file with this path")
|
||||
parser.add_argument('--llama4bit', help="Load a Llama model 4-bit pt file with this path")
|
||||
|
||||
#args: argparse.Namespace = None
|
||||
if "pytest" in sys.modules and override_args is None:
|
||||
args = parser.parse_args([])
|
||||
@@ -1626,11 +1629,6 @@ def general_startup(override_args=None):
|
||||
koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
|
||||
|
||||
koboldai_vars.aria2_port = args.aria2_port or 6799
|
||||
|
||||
global vars_4bit
|
||||
vars_4bit["gptj4bit"] = args.gptj4bit
|
||||
vars_4bit["gptneox4bit"] = args.gptneox4bit
|
||||
vars_4bit["llama4bit"] = args.llama4bit
|
||||
|
||||
#Now let's look to see if we are going to force a load of a model from a user selected folder
|
||||
if(koboldai_vars.model == "selectfolder"):
|
||||
@@ -1777,6 +1775,7 @@ def get_model_info(model, directory=""):
|
||||
'break_values': break_values, 'gpu_count': gpu_count,
|
||||
'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
|
||||
'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
|
||||
'bit_4_available': koboldai_vars.bit_4_available if koboldai_vars.experimental_features else False,
|
||||
'show_custom_model_box': show_custom_model_box})
|
||||
if send_horde_models:
|
||||
get_cluster_models({'key': key_value, 'url': default_url})
|
||||
@@ -1918,6 +1917,18 @@ def get_cluster_models(msg):
|
||||
emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
|
||||
|
||||
|
||||
@socketio.on("use_4_bit_toggle")
|
||||
def use_4_bit_toggle(msg):
|
||||
# Disable lazy_load and breakmodel
|
||||
if msg["use_4_bit"]:
|
||||
koboldai_vars.lazy_load = False
|
||||
koboldai_vars.nobreakmodel = True
|
||||
else:
|
||||
koboldai_vars.lazy_load = True
|
||||
koboldai_vars.nobreakmodel = False
|
||||
|
||||
# TODO: Reload JS values for this stuff
|
||||
|
||||
# Function to patch transformers to use our soft prompt
|
||||
def patch_causallm(model):
|
||||
from torch.nn import Embedding
|
||||
@@ -2647,7 +2658,7 @@ def unload_model():
|
||||
koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
|
||||
|
||||
|
||||
def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
|
||||
def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False, use_4_bit=False):
|
||||
global model
|
||||
global generator
|
||||
global torch
|
||||
@@ -2684,7 +2695,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
disk_layers = args.breakmodel_disklayers
|
||||
if breakmodel_args_default_to_cpu and disk_layers is None:
|
||||
disk_layers = args.breakmodel_disklayers = 0
|
||||
|
||||
|
||||
unload_model()
|
||||
|
||||
if online_model == "":
|
||||
@@ -2904,10 +2915,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def get_original_key(key):
|
||||
try:
|
||||
return max((original_key for original_key in utils.module_names if original_key.endswith(key)), key=len)
|
||||
except ValueError:
|
||||
return key
|
||||
# try:
|
||||
return max((original_key for original_key in utils.module_names if original_key.endswith(key)), key=len)
|
||||
# except ValueError:
|
||||
# return key
|
||||
|
||||
for key, value in model_dict.items():
|
||||
original_key = get_original_key(key)
|
||||
@@ -2970,10 +2981,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
||||
#print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
|
||||
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
||||
# if model_dict[key].dtype is torch.float32:
|
||||
# koboldai_vars.fp32_model = True
|
||||
# if convert_to_float16 and breakmodel.primary_device != "cpu" and koboldai_vars.hascuda and (koboldai_vars.breakmodel or koboldai_vars.usegpu) and model_dict[key].dtype is torch.float32:
|
||||
# model_dict[key] = model_dict[key].to(torch.float16)
|
||||
if not use_4_bit:
|
||||
if model_dict[key].dtype is torch.float32:
|
||||
koboldai_vars.fp32_model = True
|
||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and koboldai_vars.hascuda and (koboldai_vars.breakmodel or koboldai_vars.usegpu) and model_dict[key].dtype is torch.float32:
|
||||
model_dict[key] = model_dict[key].to(torch.float16)
|
||||
if breakmodel.primary_device == "cpu" or (not koboldai_vars.usegpu and not koboldai_vars.breakmodel and model_dict[key].dtype is torch.float16):
|
||||
model_dict[key] = model_dict[key].to(torch.float32)
|
||||
if device == "shared":
|
||||
@@ -2997,16 +3009,17 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if utils.offload_index:
|
||||
for name, tensor in utils.named_buffers:
|
||||
dtype = tensor.dtype
|
||||
# if convert_to_float16 and breakmodel.primary_device != "cpu" and koboldai_vars.hascuda and (koboldai_vars.breakmodel or koboldai_vars.usegpu):
|
||||
# dtype = torch.float16
|
||||
# if breakmodel.primary_device == "cpu" or (not koboldai_vars.usegpu and not koboldai_vars.breakmodel):
|
||||
# dtype = torch.float32
|
||||
# if name in model_dict and model_dict[name].dtype is not dtype:
|
||||
# model_dict[name] = model_dict[name].to(dtype)
|
||||
# if tensor.dtype is not dtype:
|
||||
# tensor = tensor.to(dtype)
|
||||
# if name not in utils.offload_index:
|
||||
# accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
|
||||
if not use_4_bit:
|
||||
if convert_to_float16 and breakmodel.primary_device != "cpu" and koboldai_vars.hascuda and (koboldai_vars.breakmodel or koboldai_vars.usegpu):
|
||||
dtype = torch.float16
|
||||
if breakmodel.primary_device == "cpu" or (not koboldai_vars.usegpu and not koboldai_vars.breakmodel):
|
||||
dtype = torch.float32
|
||||
if name in model_dict and model_dict[name].dtype is not dtype:
|
||||
model_dict[name] = model_dict[name].to(dtype)
|
||||
if tensor.dtype is not dtype:
|
||||
tensor = tensor.to(dtype)
|
||||
if name not in utils.offload_index:
|
||||
accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
|
||||
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
|
||||
utils.bar.close()
|
||||
utils.bar = None
|
||||
@@ -3065,10 +3078,16 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
# Is CUDA available? If so, use GPU, otherwise fall back to CPU
|
||||
if(koboldai_vars.hascuda and koboldai_vars.usegpu):
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
if not use_4_bit:
|
||||
model = model.half().to(koboldai_vars.gpu_device)
|
||||
else:
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
generator = model.generate
|
||||
else:
|
||||
model = model.to('cpu')
|
||||
if not use_4_bit:
|
||||
model = model.to('cpu').float()
|
||||
else:
|
||||
model = model.to('cpu')
|
||||
generator = model.generate
|
||||
patch_causallm(model)
|
||||
# Use the Generic implementation
|
||||
@@ -3105,17 +3124,26 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if(koboldai_vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
||||
lowmem = {}
|
||||
if(os.path.isdir(koboldai_vars.custmodpth)):
|
||||
global vars_4bit
|
||||
|
||||
if vars_4bit.get("gptj4bit"):
|
||||
model = gptj_load_quant(koboldai_vars.custmodpth, vars_4bit["gptj4bit"], 4)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
elif vars_4bit.get("gptneox4bit"):
|
||||
model = gptneox_load_quant(koboldai_vars.custmodpth, vars_4bit["gptneox4bit"], 4)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
elif vars_4bit.get("llama4bit"):
|
||||
model = llama_load_quant(koboldai_vars.custmodpth, vars_4bit["llama4bit"], 4)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
path_4bit = os.path.join(koboldai_vars.custmodpth, "4bit.pt")
|
||||
|
||||
if not os.path.isfile(path_4bit):
|
||||
print(f"4-bit file {path_4bit} not found, aborting 4-bit load")
|
||||
use_4_bit = False
|
||||
|
||||
if use_4_bit:
|
||||
print(f"Trying to load {koboldai_vars.model_type} model in 4-bit")
|
||||
if koboldai_vars.model_type == "gptj":
|
||||
model = gptj_load_quant(koboldai_vars.custmodpth, path_4bit, 4)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
elif koboldai_vars.model_type == "gpt_neox":
|
||||
model = gptneox_load_quant(koboldai_vars.custmodpth, path_4bit, 4)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
elif koboldai_vars.model_type == "llama":
|
||||
model = llama_load_quant(koboldai_vars.custmodpth, path_4bit, 4)
|
||||
tokenizer = LlamaTokenizer.from_pretrained(koboldai_vars.custmodpth)
|
||||
else:
|
||||
raise RuntimeError(f"4-bit load failed. Model type {koboldai_vars.model_type} not supported in 4-bit")
|
||||
else:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
@@ -3185,6 +3213,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
import shutil
|
||||
tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')))
|
||||
if(koboldai_vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)): # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case
|
||||
if not use_4_bit:
|
||||
model = model.half()
|
||||
model.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), max_shard_size="500MiB")
|
||||
else: # For fp16 models, we can just copy the model files directly
|
||||
import transformers.configuration_utils
|
||||
@@ -3218,27 +3248,36 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
if(koboldai_vars.hascuda):
|
||||
if(koboldai_vars.usegpu):
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
if not use_4_bit:
|
||||
model = model.half().to(koboldai_vars.gpu_device)
|
||||
else:
|
||||
model = model.to(koboldai_vars.gpu_device)
|
||||
generator = model.generate
|
||||
elif(koboldai_vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
if(not koboldai_vars.lazy_load):
|
||||
device_config(model.config)
|
||||
move_model_to_devices(model)
|
||||
move_model_to_devices(model, use_4_bit)
|
||||
elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
|
||||
move_model_to_devices(model)
|
||||
move_model_to_devices(model, use_4_bit)
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
generator = model.generate
|
||||
else:
|
||||
model = model.to('cpu')
|
||||
if not use_4_bit:
|
||||
model.to('cpu').float()
|
||||
else:
|
||||
model.to('cpu')
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
generator = model.generate
|
||||
elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
|
||||
move_model_to_devices(model)
|
||||
move_model_to_devices(model, use_4_bit)
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
generator = model.generate
|
||||
else:
|
||||
model.to('cpu')
|
||||
if not use_4_bit:
|
||||
model.to('cpu').float()
|
||||
else:
|
||||
model.to('cpu')
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
generator = model.generate
|
||||
|
||||
@@ -8784,7 +8823,7 @@ def UI_2_load_model(data):
|
||||
koboldai_vars.model = data['model']
|
||||
koboldai_vars.custmodpth = data['path']
|
||||
print("loading Model")
|
||||
load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
|
||||
load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'], use_4_bit=data['use_4_bit'])
|
||||
|
||||
#==================================================================#
|
||||
# Event triggered when load story is clicked
|
||||
|
@@ -1207,7 +1207,7 @@ class system_settings(settings):
|
||||
'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy',
|
||||
'serverstarted', 'inference_config', 'image_pipeline', 'summarizer',
|
||||
'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
|
||||
'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states']
|
||||
'generating_image', 'bit_8_available', 'bit_4_available', 'host', 'hascuda', 'usegpu', 'rng_states']
|
||||
settings_name = "system"
|
||||
def __init__(self, socketio, koboldai_var):
|
||||
self._socketio = socketio
|
||||
@@ -1302,6 +1302,8 @@ class system_settings(settings):
|
||||
elif torch.cuda.get_device_properties(device).major == 7 and torch.cuda.get_device_properties(device).minor >= 2:
|
||||
self.bit_8_available = True
|
||||
break
|
||||
# Check if repos/gptq exists for 4-bit mode
|
||||
self.bit_4_available = os.path.isdir("repos/gptq")
|
||||
self.seen_messages = []
|
||||
|
||||
|
||||
@@ -2744,4 +2746,4 @@ default_preset = {
|
||||
]
|
||||
}
|
||||
badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
|
||||
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
|
||||
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
|
||||
|
@@ -1472,6 +1472,7 @@ function show_model_menu(data) {
|
||||
document.getElementById("modelurl").classList.add("hidden");
|
||||
document.getElementById("use_gpu_div").classList.add("hidden");
|
||||
document.getElementById("use_8_bit_div").classList.add("hidden");
|
||||
document.getElementById("use_4_bit_div").classList.add("hidden");
|
||||
document.getElementById("modellayers").classList.add("hidden");
|
||||
document.getElementById("oaimodel").classList.add("hidden");
|
||||
var model_layer_bars = document.getElementById('model_layer_bars');
|
||||
@@ -1646,6 +1647,14 @@ function selected_model_info(data) {
|
||||
document.getElementById("use_8_bit").checked = false;
|
||||
}
|
||||
|
||||
//hide or unhide 4 bit mode
|
||||
if (data.bit_4_available) {
|
||||
document.getElementById("use_4_bit_div").classList.remove("hidden");
|
||||
} else {
|
||||
document.getElementById("use_4_bit_div").classList.add("hidden");
|
||||
document.getElementById("use_4_bit").checked = false;
|
||||
}
|
||||
|
||||
//default URL loading
|
||||
if (data.default_url != null) {
|
||||
document.getElementById("modelurl").value = data.default_url;
|
||||
@@ -1815,7 +1824,7 @@ function selected_model_info(data) {
|
||||
}
|
||||
accept.disabled = false;
|
||||
|
||||
|
||||
set_4_bit_mode(invert=false);
|
||||
}
|
||||
|
||||
function update_gpu_layers() {
|
||||
@@ -1876,7 +1885,8 @@ function load_model() {
|
||||
'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(),
|
||||
'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value,
|
||||
'online_model': selected_models,
|
||||
'use_8_bit': document.getElementById('use_8_bit').checked};
|
||||
'use_8_bit': document.getElementById('use_8_bit').checked,
|
||||
'use_4_bit': document.getElementById('use_4_bit').checked};
|
||||
socket.emit("load_model", message);
|
||||
closePopups();
|
||||
}
|
||||
@@ -3160,6 +3170,22 @@ function save_preset() {
|
||||
closePopups();
|
||||
}
|
||||
|
||||
function set_4_bit_mode(invert=true) {
|
||||
bit_4_status = document.getElementById("use_4_bit").checked;
|
||||
if (invert) {
|
||||
bit_4_status = !bit_4_status;
|
||||
}
|
||||
if (bit_4_status) {
|
||||
document.getElementById("modellayers").classList.add("hidden");
|
||||
socket.emit("use_4_bit_toggle", {"use_4_bit": false});
|
||||
} else {
|
||||
document.getElementById("modellayers").classList.remove("hidden");
|
||||
socket.emit("use_4_bit_toggle", {"use_4_bit": true});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//--------------------------------------------General UI Functions------------------------------------
|
||||
function set_ui_level(level) {
|
||||
for (classname of ['setting_container', 'setting_container_single', 'setting_container_single_wide', 'biasing', 'palette_area', 'advanced_theme']) {
|
||||
@@ -7301,4 +7327,4 @@ $el("#gamescreen").addEventListener("paste", function(event) {
|
||||
false,
|
||||
event.clipboardData.getData("text/plain")
|
||||
);
|
||||
});
|
||||
});
|
||||
|
@@ -75,6 +75,10 @@
|
||||
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_8_bit" checked>
|
||||
<div class="box-label">Use 8 bit mode</div>
|
||||
</div>
|
||||
<div class="box flex-push-right hidden" id=use_4_bit_div>
|
||||
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_4_bit" checked>
|
||||
<div class="box-label">Use 4 bit mode</div>
|
||||
</div>
|
||||
<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>
|
||||
<button type="button" class="btn popup_load_cancel_button" onclick='closePopups();' id="btn_loadmodelclose">Cancel</button>
|
||||
</div>
|
||||
@@ -402,4 +406,4 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="notification-container"></div>
|
||||
<div id="notification-container"></div>
|
||||
|
Reference in New Issue
Block a user