diff --git a/aiserver.py b/aiserver.py
index 50952239..6a5af325 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1782,15 +1782,15 @@ def get_layer_count(model, directory=""):
model = directory
from transformers import AutoConfig
if(os.path.isdir(model.replace('/', '_'))):
- model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=args.revision, cache_dir="cache")
elif(is_model_downloaded(model)):
- model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=args.revision, cache_dir="cache")
elif(os.path.isdir(directory)):
- model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(directory, revision=args.revision, cache_dir="cache")
elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
- model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
else:
- model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(model, revision=args.revision, cache_dir="cache")
try:
if ((utils.HAS_ACCELERATE and model_config.model_type != 'gpt2') or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
return utils.num_layers(model_config)
@@ -2774,19 +2774,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
from transformers import AutoConfig
if(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
try:
- model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=args.revision, cache_dir="cache")
koboldai_vars.model_type = model_config.model_type
except ValueError as e:
koboldai_vars.model_type = "not_found"
elif(os.path.isdir("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')))):
try:
- model_config = AutoConfig.from_pretrained("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained("models/{}".format(koboldai_vars.custmodpth.replace('/', '_')), revision=args.revision, cache_dir="cache")
koboldai_vars.model_type = model_config.model_type
except ValueError as e:
koboldai_vars.model_type = "not_found"
else:
try:
- model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
koboldai_vars.model_type = model_config.model_type
except ValueError as e:
koboldai_vars.model_type = "not_found"
@@ -2886,7 +2886,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
print(tokenizer_id, koboldai_vars.newlinemode)
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
loadsettings()
koboldai_vars.colaburl = url or koboldai_vars.colaburl
@@ -3071,19 +3071,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
with(maybe_use_float16()):
try:
if os.path.exists(koboldai_vars.custmodpth):
- model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
- tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
elif os.path.exists(os.path.join("models/", koboldai_vars.custmodpth)):
- model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=koboldai_vars.revision, cache_dir="cache")
- tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=koboldai_vars.revision, cache_dir="cache")
+ model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=args.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", koboldai_vars.custmodpth), revision=args.revision, cache_dir="cache")
else:
- model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
- tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ model = GPT2LMHeadModel.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
except Exception as e:
if("out of memory" in traceback.format_exc().lower()):
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
raise e
- tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
model.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), max_shard_size="500MiB")
tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')))
koboldai_vars.modeldim = get_hidden_size_from_model(model)
@@ -3130,38 +3130,38 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
lowmem = {}
if(os.path.isdir(koboldai_vars.custmodpth)):
try:
- tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
- tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
except Exception as e:
try:
- tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache")
except Exception as e:
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
try:
- model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = AutoModelForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
except Exception as e:
if("out of memory" in traceback.format_exc().lower()):
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
- model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.custmodpth, revision=args.revision, cache_dir="cache", **lowmem)
elif(os.path.isdir("models/{}".format(koboldai_vars.model.replace('/', '_')))):
try:
- tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
- tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
except Exception as e:
try:
- tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache")
except Exception as e:
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
try:
- model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = AutoModelForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
except Exception as e:
if("out of memory" in traceback.format_exc().lower()):
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
- model = GPTNeoForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = GPTNeoForCausalLM.from_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), revision=args.revision, cache_dir="cache", **lowmem)
else:
old_rebuild_tensor = torch._utils._rebuild_tensor
def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
@@ -3177,21 +3177,21 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
torch._utils._rebuild_tensor = new_rebuild_tensor
try:
- tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
- tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
except Exception as e:
try:
- tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache")
except Exception as e:
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
try:
- model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = AutoModelForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
except Exception as e:
if("out of memory" in traceback.format_exc().lower()):
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
- model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=koboldai_vars.revision, cache_dir="cache", **lowmem)
+ model = GPTNeoForCausalLM.from_pretrained(koboldai_vars.model, revision=args.revision, cache_dir="cache", **lowmem)
torch._utils._rebuild_tensor = old_rebuild_tensor
@@ -3208,13 +3208,13 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
import huggingface_hub
legacy = packaging.version.parse(transformers_version) < packaging.version.parse("4.22.0.dev0")
# Save the config.json
- shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.configuration_utils.CONFIG_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
+ shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.configuration_utils.CONFIG_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
if(utils.num_shards is None):
# Save the pytorch_model.bin of an unsharded model
try:
- shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
+ shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
except:
- shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, "model.safetensors", revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), "model.safetensors"))
+ shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, "model.safetensors", revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), "model.safetensors"))
else:
with open(utils.from_pretrained_index_filename) as f:
map_data = json.load(f)
@@ -3223,7 +3223,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
shutil.move(os.path.realpath(utils.from_pretrained_index_filename), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
# Then save the pytorch_model-#####-of-#####.bin files
for filename in filenames:
- shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, filename, revision=koboldai_vars.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), filename))
+ shutil.move(os.path.realpath(huggingface_hub.hf_hub_download(koboldai_vars.model, filename, revision=args.revision, cache_dir="cache", local_files_only=True, legacy_cache_layout=legacy)), os.path.join("models/{}".format(koboldai_vars.model.replace('/', '_')), filename))
shutil.rmtree("cache/")
if(koboldai_vars.badwordsids is koboldai_settings.badwordsids_default and koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")):
@@ -3269,7 +3269,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
else:
from transformers import GPT2Tokenizer
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
else:
from transformers import PreTrainedModel
from transformers import modeling_utils
@@ -3682,7 +3682,7 @@ def lua_decode(tokens):
if("tokenizer" not in globals()):
from transformers import GPT2Tokenizer
global tokenizer
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
return utils.decodenewlines(tokenizer.decode(tokens))
#==================================================================#
@@ -3694,7 +3694,7 @@ def lua_encode(string):
if("tokenizer" not in globals()):
from transformers import GPT2Tokenizer
global tokenizer
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
#==================================================================#
@@ -4852,19 +4852,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
try:
if(os.path.isdir(tokenizer_id)):
try:
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
except:
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
elif(os.path.isdir("models/{}".format(tokenizer_id.replace('/', '_')))):
try:
- tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache")
except:
- tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained("models/{}".format(tokenizer_id.replace('/', '_')), revision=args.revision, cache_dir="cache", use_fast=False)
else:
try:
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache")
except:
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, revision=args.revision, cache_dir="cache", use_fast=False)
except:
logger.warning(f"Unknown tokenizer {repr(tokenizer_id)}")
koboldai_vars.api_tokenizer_id = tokenizer_id
@@ -5239,7 +5239,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
if("tokenizer" not in globals()):
from transformers import GPT2Tokenizer
global tokenizer
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=koboldai_vars.revision, cache_dir="cache")
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=args.revision, cache_dir="cache")
lnheader = len(tokenizer._koboldai_header)
@@ -6491,6 +6491,12 @@ def applyoutputformatting(txt, no_sentence_trimming=False, no_single_line=False)
if len(txt) == 0:
return txt
+ # Workaround for endoftext appearing in models that need it, you can supposedly do this directly with the tokenizer but it keeps showing up
+ # So for now since we only have two known end of text tokens and only one model that wishes to have its generation stopped this is easier
+ # If you see this and you wish to do a universal implementation for this, feel free just make sure to test it on all platforms - Henk
+ txt = txt.replace("<|endoftext|>", "")
+ txt = txt.replace("", "")
+
# Use standard quotes and apostrophes
txt = utils.fixquotes(txt)
diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb
index 927f5015..ba86b65e 100644
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@@ -71,9 +71,9 @@
"#@title <-- Select your model below and then click this to start KoboldAI\n",
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
"\n",
- "Model = \"Nerys V2 6B\" #@param [\"Nerys V2 6B\", \"Erebus 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"Shinen 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
+ "Model = \"Nerys V2 6B\" #@param [\"Nerys V2 6B\", \"Erebus 6B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Pygmalion 6B\", \"Pygmalion 6B Dev\", \"Lit V2 6B\", \"Lit 6B\", \"Shinen 6B\", \"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
- "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
+ "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
"use_google_drive = True #@param {type:\"boolean\"}\n",
"\n",
"!nvidia-smi\n",
@@ -87,6 +87,8 @@
" if not os.path.exists(\"/content/drive/MyDrive/\"):\n",
" os.mkdir(\"/content/drive/MyDrive/\")\n",
"\n",
+ "Revision = \"\"\n",
+ "\n",
"if Model == \"Nerys V2 6B\":\n",
" Model = \"KoboldAI/OPT-6B-nerys-v2\"\n",
" path = \"\"\n",
@@ -111,6 +113,13 @@
" Model = \"PygmalionAI/pygmalion-6b\"\n",
" path = \"\"\n",
" download = \"\"\n",
+ " Version = \"United\"\n",
+ "elif Model == \"Pygmalion 6B Dev\":\n",
+ " Model = \"PygmalionAI/pygmalion-6b\"\n",
+ " Revision = \"--revision dev\"\n",
+ " path = \"\"\n",
+ " Version = \"United\"\n",
+ " download = \"\"\n",
"elif Model == \"Lit V2 6B\":\n",
" Model = \"hakurei/litv2-6B-rev3\"\n",
" path = \"\"\n",
@@ -173,7 +182,7 @@
"else:\n",
" tunnel = \"\"\n",
"\n",
- "!wget https://koboldai.org/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
+ "!wget https://koboldai.org/ckds -O - | bash /dev/stdin -m $Model -g $Version $Revision $tunnel"
],
"execution_count": null,
"outputs": []
diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb
index f40dad68..60664780 100644
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@@ -66,9 +66,9 @@
"#@title <-- Select your model below and then click this to start KoboldAI\n",
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
"\n",
- "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
+ "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Pygmalion 6B\", \"Pygmalion 6B Dev\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
- "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
+ "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
"use_google_drive = True #@param {type:\"boolean\"}\n",
"\n",
"import os\n",
@@ -133,6 +133,13 @@
" Model = \"PygmalionAI/pygmalion-6b\"\n",
" path = \"\"\n",
" download = \"\"\n",
+ " Version = \"United\"\n",
+ "elif Model == \"Pygmalion 6B Dev\":\n",
+ " Model = \"PygmalionAI/pygmalion-6b\"\n",
+ " Revision = \"--revision dev\"\n",
+ " path = \"\"\n",
+ " Version = \"United\"\n",
+ " download = \"\"\n",
"elif Model == \"Lit V2 6B\":\n",
" Model = \"hakurei/litv2-6B-rev3\"\n",
" path = \"\"\n",
diff --git a/colabkobold.sh b/colabkobold.sh
index 799a6d9c..d8e6f1c4 100644
--- a/colabkobold.sh
+++ b/colabkobold.sh
@@ -2,7 +2,7 @@
# KoboldAI Easy Colab Deployment Script by Henk717
# read the options
-TEMP=`getopt -o m:i:p:c:d:x:a:l:z:g:t:n:b:s: --long model:,init:,path:,configname:,download:,aria2:,dloc:,xloc:,7z:,git:,tar:,ngrok:,branch:,savemodel:,localtunnel:,lt: -- "$@"`
+TEMP=`getopt -o m:i:p:c:d:x:a:l:z:g:t:n:b:s:r: --long model:,init:,path:,configname:,download:,aria2:,dloc:,xloc:,7z:,git:,tar:,ngrok:,branch:,savemodel:,localtunnel:,lt:,revision: -- "$@"`
eval set -- "$TEMP"
# extract options and their arguments into variables.
@@ -10,6 +10,8 @@ while true ; do
case "$1" in
-m|--model)
model=" --model $2" ; shift 2 ;;
+ -r|--revision)
+ revision=" --revision $2" ; shift 2 ;;
-i|--init)
init=$2 ; shift 2 ;;
-p|--path)
@@ -52,8 +54,8 @@ function launch
exit 0
else
cd /content/KoboldAI-Client
- echo "Launching KoboldAI with the following options : python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel --colab"
- python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel --colab
+ echo "Launching KoboldAI with the following options : python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel$revision --colab"
+ python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel$revision --colab
exit
fi
}
diff --git a/koboldai_settings.py b/koboldai_settings.py
index 5318e09a..4038206d 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -331,13 +331,8 @@ class koboldai_vars(object):
######################################### Get Action Text by Sentence ########################################################
action_text_split = self.actions.to_sentences(submitted_text=submitted_text)
- if action_text_split == []:
- if return_text:
- return ""
- return [], 0, 0+self.genamt, []
-
# Always add newlines on chat v2
- if self.is_chat_v2():
+ if action_text_split and self.is_chat_v2():
action_text_split[-1][0] = action_text_split[-1][0].strip() + "\n"
######################################### Prompt ########################################################
@@ -648,7 +643,7 @@ class model_settings(settings):
no_save_variables = ['tqdm', 'tqdm_progress', 'tqdm_rem_time', 'socketio', 'modelconfig', 'custmodpth', 'generated_tkns',
'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset',
'koboldai_vars', 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
- 'badwordsids', 'uid_presets']
+ 'badwordsids', 'uid_presets', 'revision', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode']
settings_name = "model"
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 0.7, "rep_pen_range": 1024, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
"sampler_order": [6,0,1,2,3,4,5]}
diff --git a/static/koboldai.css b/static/koboldai.css
index 516172a9..0a298df9 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -1658,7 +1658,7 @@ body {
.themerow {
grid-area: theme;
- width: 500px;
+ /* width: 500px; */
margin-left: 10px;
}
diff --git a/templates/index_new.html b/templates/index_new.html
index d1632de0..ff3328a4 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -7,6 +7,7 @@
window.addEventListener('error', function(event) {
let fileName = event.filename.split("/").splice(-1)[0];
+ if (event.message.includes("ResizeObserver loop completed with undelivered notifications")) return;
reportError(`Error at ${fileName}:${event.lineno}`, `${event.message}\n--\nPlease report this error to the developers.`);
debug_info.errors.push({msg: event.message, url: event.filename, line: event.lineno});
});
diff --git a/utils.py b/utils.py
index 22fc05a4..45a7817f 100644
--- a/utils.py
+++ b/utils.py
@@ -286,7 +286,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
if token is None:
raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
_cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
- _revision = revision if revision is not None else huggingface_hub.constants.DEFAULT_REVISION
+ _revision = args.revision if args.revision is not None else huggingface_hub.constants.DEFAULT_REVISION
sharded = False
headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
if use_auth_token:
@@ -297,7 +297,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
def is_cached(filename):
try:
- huggingface_hub.hf_hub_download(pretrained_model_name_or_path, filename, cache_dir=cache_dir, local_files_only=True)
+ huggingface_hub.hf_hub_download(pretrained_model_name_or_path, filename, cache_dir=cache_dir, local_files_only=True, revision=_revision)
except ValueError:
return False
return True
@@ -306,7 +306,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
except AttributeError:
return
- url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=revision)
+ url = huggingface_hub.hf_hub_url(pretrained_model_name_or_path, filename, revision=_revision)
if is_cached(filename) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
break
if sharded:
@@ -320,7 +320,7 @@ def _transformers22_aria2_hook(pretrained_model_name_or_path: str, force_downloa
with open(map_filename) as f:
map_data = json.load(f)
filenames = set(map_data["weight_map"].values())
- urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=revision) for n in filenames]
+ urls = [huggingface_hub.hf_hub_url(pretrained_model_name_or_path, n, revision=_revision) for n in filenames]
if not force_download:
urls = [u for u, n in zip(urls, filenames) if not is_cached(n)]
if not urls: