mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
114
aiserver.py
114
aiserver.py
@@ -1337,6 +1337,8 @@ def processsettings(js):
|
||||
koboldai_vars.chatmode = js["chatmode"]
|
||||
if("chatname" in js):
|
||||
koboldai_vars.chatname = js["chatname"]
|
||||
if("botname" in js):
|
||||
koboldai_vars.botname = js["botname"]
|
||||
if("dynamicscan" in js):
|
||||
koboldai_vars.dynamicscan = js["dynamicscan"]
|
||||
if("nopromptgen" in js):
|
||||
@@ -1783,15 +1785,15 @@ def get_layer_count(model, directory=""):
|
||||
model = directory
|
||||
from transformers import AutoConfig
|
||||
if(os.path.isdir(model.replace('/', '_'))):
|
||||
model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
elif(is_model_downloaded(model)):
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
elif(os.path.isdir(directory)):
|
||||
model_config = AutoConfig.from_pretrained(directory, revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
try:
|
||||
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
|
||||
return utils.num_layers(model_config)
|
||||
@@ -2764,19 +2766,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
from transformers import AutoConfig
|
||||
if(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
koboldai_vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
koboldai_vars.model_type = "not_found"
|
||||
elif(os.path.isdir("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')))):
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
koboldai_vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
koboldai_vars.model_type = "not_found"
|
||||
else:
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
koboldai_vars.model_type = model_config.model_type
|
||||
except ValueError as e:
|
||||
koboldai_vars.model_type = "not_found"
|
||||
@@ -2876,7 +2878,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
|
||||
print(tokenizer_id, koboldai_vars.newlinemode)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
|
||||
loadsettings()
|
||||
koboldai_vars.colaburl = url or koboldai_vars.colaburl
|
||||
@@ -3061,19 +3063,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
with(maybe_use_float16()):
|
||||
try:
|
||||
if os.path.exists(koboldai_vars.custmodpth):
|
||||
model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
elif os.path.exists(os.path.join("models/", koboldai_vars.custmodpth)):
|
||||
model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=args.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
else:
|
||||
model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
raise e
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
model.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), max_shard_size="500MiB")
|
||||
tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')))
|
||||
koboldai_vars.modeldim = get_hidden_size_from_model(model)
|
||||
@@ -3120,38 +3122,38 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
lowmem = {}
|
||||
if(os.path.isdir(koboldai_vars.custmodpth)):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
elif(os.path.isdir("models/{}".format(koboldai_vars.model.replace('/', '_')))):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
else:
|
||||
old_rebuild_tensor = torch._utils._rebuild_tensor
|
||||
def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
|
||||
@@ -3167,21 +3169,21 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
torch._utils._rebuild_tensor = new_rebuild_tensor
|
||||
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
try:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except Exception as e:
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
try:
|
||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
except Exception as e:
|
||||
if("out of memory" in traceback.format_exc().lower()):
|
||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||
model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
|
||||
model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
|
||||
|
||||
torch._utils._rebuild_tensor = old_rebuild_tensor
|
||||
|
||||
@@ -3198,13 +3200,13 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
import huggingface_hub
|
||||
legacy = packaging.version.parse(transformers_version) < packaging.version.parse("4.22.0.dev0")
|
||||
# Save the config.json
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.configuration_utils.CONFIG_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.configuration_utils.CONFIG_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
|
||||
if(utils.num_shards is None):
|
||||
# Save the pytorch_model.bin of an unsharded model
|
||||
try:
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
|
||||
except:
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, "model.safetensors", revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), "model.safetensors"))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, "model.safetensors", revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), "model.safetensors"))
|
||||
else:
|
||||
with open(utils.from_pretrained_index_filename) as f:
|
||||
map_data = json.load(f)
|
||||
@@ -3213,7 +3215,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
shutil.move(os.path.realpath(utils.from_pretrained_index_filename), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
|
||||
# Then save the pytorch_model-#####-of-#####.bin files
|
||||
for filename in filenames:
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, filename, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), filename))
|
||||
shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, filename, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), filename))
|
||||
shutil.rmtree("cache/")
|
||||
|
||||
if(koboldai_vars.badwordsids is koboldai_settings.badwordsids_default and koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")):
|
||||
@@ -3259,7 +3261,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
|
||||
else:
|
||||
from transformers import GPT2Tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
else:
|
||||
from transformers import PreTrainedModel
|
||||
from transformers import modeling_utils
|
||||
@@ -3672,7 +3674,7 @@ def lua_decode(tokens):
|
||||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
return utils.decodenewlines(tokenizer.decode(tokens))
|
||||
|
||||
#==================================================================#
|
||||
@@ -3684,7 +3686,7 @@ def lua_encode(string):
|
||||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
|
||||
|
||||
#==================================================================#
|
||||
@@ -3858,6 +3860,7 @@ def lua_has_setting(setting):
|
||||
"useprompt",
|
||||
"chatmode",
|
||||
"chatname",
|
||||
"botname",
|
||||
"adventure",
|
||||
"dynamicscan",
|
||||
"nopromptgen",
|
||||
@@ -4174,6 +4177,7 @@ def do_connect():
|
||||
return
|
||||
logger.debug("{0}Client connected!{1}".format(colors.GREEN, colors.END))
|
||||
emit('from_server', {'cmd': 'setchatname', 'data': koboldai_vars.chatname}, room="UI_1")
|
||||
emit('from_server', {'cmd': 'setbotname', 'data': koboldai_vars.botname}, room="UI_1")
|
||||
emit('from_server', {'cmd': 'setanotetemplate', 'data': koboldai_vars.authornotetemplate}, room="UI_1")
|
||||
emit('from_server', {'cmd': 'connected', 'smandelete': koboldai_vars.smandelete, 'smanrename': koboldai_vars.smanrename, 'modelname': getmodelname()}, room="UI_1")
|
||||
if(koboldai_vars.host):
|
||||
@@ -4239,8 +4243,10 @@ def get_message(msg):
|
||||
if(type(msg['chatname']) is not str):
|
||||
raise ValueError("Chatname must be a string")
|
||||
koboldai_vars.chatname = msg['chatname']
|
||||
koboldai_vars.botname = msg['botname']
|
||||
settingschanged()
|
||||
emit('from_server', {'cmd': 'setchatname', 'data': koboldai_vars.chatname}, room="UI_1")
|
||||
emit('from_server', {'cmd': 'setbotname', 'data': koboldai_vars.botname}, room="UI_1")
|
||||
koboldai_vars.recentrng = koboldai_vars.recentrngm = None
|
||||
actionsubmit(msg['data'], actionmode=msg['actionmode'])
|
||||
elif(koboldai_vars.mode == "edit"):
|
||||
@@ -4258,8 +4264,10 @@ def get_message(msg):
|
||||
if(type(msg['chatname']) is not str):
|
||||
raise ValueError("Chatname must be a string")
|
||||
koboldai_vars.chatname = msg['chatname']
|
||||
koboldai_vars.botname = msg['botname']
|
||||
settingschanged()
|
||||
emit('from_server', {'cmd': 'setchatname', 'data': koboldai_vars.chatname}, room="UI_1")
|
||||
emit('from_server', {'cmd': 'setbotname', 'data': koboldai_vars.botname}, room="UI_1")
|
||||
actionretry(msg['data'])
|
||||
# Back/Undo Action
|
||||
elif(msg['cmd'] == 'back'):
|
||||
@@ -4842,19 +4850,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
||||
try:
|
||||
if(os.path.isdir(tokenizer_id)):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
elif(os.path.isdir("models/{}".format(tokenizer_id.replace('/', '_')))):
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
else:
|
||||
try:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
|
||||
except:
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||
except:
|
||||
logger.warning(f"Unknown tokenizer {repr(tokenizer_id)}")
|
||||
koboldai_vars.api_tokenizer_id = tokenizer_id
|
||||
@@ -4875,9 +4883,13 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
|
||||
|
||||
# "Chat" mode
|
||||
if(koboldai_vars.chatmode and koboldai_vars.gamestarted):
|
||||
if(koboldai_vars.botname):
|
||||
botname = (koboldai_vars.botname + ":")
|
||||
else:
|
||||
botname = ""
|
||||
data = re.sub(r'\n+', ' ', data)
|
||||
if(len(data)):
|
||||
data = f"\n{koboldai_vars.chatname}: {data}\n"
|
||||
data = f"\n{koboldai_vars.chatname}: {data}\n{botname}"
|
||||
|
||||
# If we're not continuing, store a copy of the raw input
|
||||
if(data != ""):
|
||||
@@ -5229,7 +5241,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||
if("tokenizer" not in globals()):
|
||||
from transformers import GPT2Tokenizer
|
||||
global tokenizer
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
|
||||
|
||||
lnheader = len(tokenizer._koboldai_header)
|
||||
|
||||
@@ -7162,8 +7174,8 @@ def exitModes():
|
||||
# Launch in-browser save prompt
|
||||
#==================================================================#
|
||||
def saveas(data):
|
||||
|
||||
koboldai_vars.story_name = data['name']
|
||||
name = data['name']
|
||||
koboldai_vars.story_name = name
|
||||
if not data['pins']:
|
||||
koboldai_vars.actions.clear_all_options()
|
||||
# Check if filename exists already
|
||||
|
@@ -66,7 +66,7 @@
|
||||
"#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
|
||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||
"\n",
|
||||
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Pygmalion 6B Dev\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
||||
"Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
|
||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||
"Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||
"use_google_drive = True #@param {type:\"boolean\"}\n",
|
||||
|
@@ -649,7 +649,7 @@ class model_settings(settings):
|
||||
no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns',
|
||||
'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
|
||||
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
|
||||
'badwordsids', 'uid_presets', 'revision', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
|
||||
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
|
||||
settings_name = "model"
|
||||
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 0.7, "rep_pen_range": 1024, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
|
||||
"sampler_order": [6,0,1,2,3,4,5]}
|
||||
@@ -707,7 +707,6 @@ class model_settings(settings):
|
||||
self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
|
||||
self.newlinemode = "n"
|
||||
self.lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
|
||||
self.revision = None
|
||||
self.presets = [] # Holder for presets
|
||||
self.selected_preset = ""
|
||||
self.uid_presets = []
|
||||
@@ -871,6 +870,7 @@ class story_settings(settings):
|
||||
self.useprompt = False # Whether to send the full prompt with every submit action
|
||||
self.chatmode = False
|
||||
self.chatname = "You"
|
||||
self.botname = "Bot"
|
||||
self.adventure = False
|
||||
self.actionmode = 0
|
||||
self.storymode = 0
|
||||
@@ -1124,7 +1124,7 @@ class story_settings(settings):
|
||||
|
||||
class user_settings(settings):
|
||||
local_only_variables = ['importjs']
|
||||
no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid']
|
||||
no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"]
|
||||
settings_name = "user"
|
||||
def __init__(self, socketio):
|
||||
self._socketio = socketio
|
||||
@@ -1171,6 +1171,7 @@ class user_settings(settings):
|
||||
self.screenshot_author_name = "Anonymous"
|
||||
self.screenshot_use_boring_colors = False
|
||||
self.oaiurl = "" # OpenAI API URL
|
||||
self.revision = None
|
||||
self.oaiengines = "https://api.openai.com/v1/engines"
|
||||
self.url = "https://api.inferkit.com/v1/models/standard/generate" # InferKit API URL
|
||||
self.colaburl = "" # Ngrok url for Google Colab mode
|
||||
|
@@ -946,8 +946,10 @@ function _dosubmit() {
|
||||
submit_throttle = null;
|
||||
input_text.val("");
|
||||
hideMessage();
|
||||
hidegenseqs();
|
||||
socket.send({'cmd': 'submit', 'allowabort': !disallow_abort, 'actionmode': adventure ? action_mode : 0, 'chatname': chatmode ? chat_name.val() : undefined, 'data': txt});
|
||||
if(!memorymode){
|
||||
hidegenseqs();
|
||||
}
|
||||
socket.send({'cmd': 'submit', 'allowabort': !disallow_abort, 'actionmode': adventure ? action_mode : 0, 'chatname': chatmode ? chat_name.val() : undefined, 'botname': chatmode ? bot_name.val() : undefined, 'data': txt});
|
||||
}
|
||||
|
||||
function changemode() {
|
||||
@@ -1490,8 +1492,10 @@ function setmodevisibility(state) {
|
||||
function setchatnamevisibility(state) {
|
||||
if(state){ // Enabling
|
||||
show([chat_name]);
|
||||
show([bot_name]);
|
||||
} else{ // Disabling
|
||||
hide([chat_name]);
|
||||
hide([bot_name]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2264,6 +2268,7 @@ $(document).ready(function(){
|
||||
input_text = $('#input_text');
|
||||
message_text = $('#messagefield');
|
||||
chat_name = $('#chatname');
|
||||
bot_name = $('#botname');
|
||||
settings_menu = $("#settingsmenu");
|
||||
format_menu = $('#formatmenu');
|
||||
anote_menu = $('#anoterowcontainer');
|
||||
@@ -2867,6 +2872,8 @@ $(document).ready(function(){
|
||||
hidegenseqs();
|
||||
} else if(msg.cmd == "setchatname") {
|
||||
chat_name.val(msg.data);
|
||||
} else if(msg.cmd == "setbotname") {
|
||||
bot_name.val(msg.data);
|
||||
} else if(msg.cmd == "setlabelnumseq") {
|
||||
// Update setting label with value from server
|
||||
$("#setnumseqcur").val(msg.data);
|
||||
@@ -3181,7 +3188,7 @@ $(document).ready(function(){
|
||||
button_actretry.on("click", function(ev) {
|
||||
beginStream();
|
||||
hideMessage();
|
||||
socket.send({'cmd': 'retry', 'chatname': chatmode ? chat_name.val() : undefined, 'data': ''});
|
||||
socket.send({'cmd': 'retry', 'chatname': chatmode ? chat_name.val() : undefined, 'botname': chatmode ? bot_name.val() : undefined, 'data': ''});
|
||||
hidegenseqs();
|
||||
});
|
||||
|
||||
|
@@ -79,6 +79,13 @@ body.connected #topmenu, #topmenu.always-available {
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
#botname {
|
||||
background-color: #404040;
|
||||
color: #ffffff;
|
||||
width: 200px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
#menuitems {
|
||||
display: flex;
|
||||
width: 100%;
|
||||
|
@@ -144,6 +144,7 @@
|
||||
<button type="button" class="btn btn-primary" id="btn_actretry">Retry</button>
|
||||
</div>
|
||||
<input type="text" id="chatname" class="form-control hidden" placeholder="Chat name">
|
||||
<input type="text" id="botname" class="form-control hidden" placeholder="Bot name">
|
||||
<div id="messagefield"></div>
|
||||
<div class="box flex-push-right">
|
||||
<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="allowediting" disabled>
|
||||
|
@@ -109,6 +109,20 @@
|
||||
<span class="setting_minlabel"><span style="top: -4px; position: relative;"></span></span>
|
||||
<span class="setting_maxlabel"><span style="top: -4px; position: relative;"></span></span>
|
||||
</div>
|
||||
<div class="setting_container chat_mode var_sync_alt_story_chatmode" ui_level=0>
|
||||
<!---Top Row---->
|
||||
<span class="setting_label">
|
||||
<span style="white-space: pre-wrap;">Bot Name: </span>
|
||||
<span class="helpicon material-icons-outlined" tooltip="The bot's name for chat mode.">help_icon</span>
|
||||
</span>
|
||||
<!---Bottom Row---->
|
||||
<span class="setting_item" style="height: 25px;">
|
||||
<input autocomplete="off" id="var_sync_story_botname" class="var_sync_story_botname settings_select" onchange="sync_to_server(this);">
|
||||
</span>
|
||||
<!---Slider Labels--->
|
||||
<span class="setting_minlabel"><span style="top: -4px; position: relative;"></span></span>
|
||||
<span class="setting_maxlabel"><span style="top: -4px; position: relative;"></span></span>
|
||||
</div>
|
||||
</div>
|
||||
<span id="debug-dump" class="cursor" onclick="openPopup('debug-file-prompt');">Download debug dump</span>
|
||||
<div id="Images">
|
||||
|
@@ -1,4 +1,5 @@
|
||||
<!---------------- World Info Card ---------------------->
|
||||
<link href="static/koboldai.css" rel="stylesheet">
|
||||
<div draggable="true" class="world_info_card" id="world_info_">
|
||||
<div class="world_info_title_area">
|
||||
<div>
|
||||
@@ -24,7 +25,7 @@
|
||||
contenteditable="true"
|
||||
data-placeholder="Person"
|
||||
spellcheck="false"
|
||||
></span>
|
||||
></span> <span class="helpicon material-icons-outlined" tooltip="Please enter a noun that describes a person, place or thing." "]">help_icon</span>
|
||||
</div>
|
||||
</div>
|
||||
<span id="world_info_delete_" class="world_info_delete">X</span>
|
||||
|
14
utils.py
14
utils.py
@@ -286,7 +286,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
||||
if token is None:
|
||||
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
|
||||
_cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
|
||||
_revision = revision if revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
_revision = koboldai_vars.revision if koboldai_vars.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
sharded = False
|
||||
headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
|
||||
if use_auth_token:
|
||||
@@ -306,7 +306,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
||||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||
except AttributeError:
|
||||
return
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||
if is_cached(filename) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||
break
|
||||
if sharded:
|
||||
@@ -320,7 +320,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
|
||||
with open(map_filename) as f:
|
||||
map_data = json.load(f)
|
||||
filenames = set(map_data["weight_map"].values())
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||
if not force_download:
|
||||
urls = [u for u, n in zip(urls, filenames) if not is_cached(n)]
|
||||
if not urls:
|
||||
@@ -485,6 +485,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
||||
import transformers
|
||||
import transformers.modeling_utils
|
||||
from huggingface_hub import HfFolder
|
||||
_revision = koboldai_vars.revision if koboldai_vars.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
if shutil.which("aria2c") is None: # Don't do anything if aria2 is not installed
|
||||
return
|
||||
if local_files_only: # If local_files_only is true, we obviously don't need to download anything
|
||||
@@ -519,7 +520,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
||||
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
|
||||
except AttributeError:
|
||||
return
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
|
||||
url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
|
||||
if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
|
||||
break
|
||||
if sharded:
|
||||
@@ -533,7 +534,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
||||
with open(map_filename) as f:
|
||||
map_data = json.load(f)
|
||||
filenames = set(map_data["weight_map"].values())
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
|
||||
urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
|
||||
if not force_download:
|
||||
urls = [u for u in urls if not is_cached(u)]
|
||||
if not urls:
|
||||
@@ -580,7 +581,8 @@ def get_num_shards(filename):
|
||||
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, **kwargs):
|
||||
import transformers.modeling_utils
|
||||
import torch
|
||||
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision)
|
||||
_revision = koboldai_vars.revision if koboldai_vars.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
|
||||
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=_revision)
|
||||
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
|
||||
|
||||
#==================================================================#
|
||||
|
Reference in New Issue
Block a user