Merge branch 'main' into neox

2025-02-17 04:00:44 +01:00 · 2022-06-21 19:30:51 -04:00 · 2022-06-21 19:30:51 -04:00 · 5e3c7c07ae
commit 5e3c7c07ae
parent 39b770d6ef 75bc472a9f
24 changed files with 1183 additions and 307 deletions
--- a/.gitignore
+++ b/.gitignore
@ -31,3 +31,6 @@ Uninstall

 # Ignore compiled Python files.
 *.pyc
+
+# Don't ignore defaults
+!defaults/*
--- a/Uninstall.bat
+++ b/Uninstall.bat
@ -0,0 +1,32 @@
+@echo off 
+cd /D %~dp0
+TITLE KoboldAI Uninstall Helper
+SET /P M=<loader.settings
+IF %M%==3 subst /D B: >nul
+IF %M%==1 subst /D K: >nul
+
+IF "%1" == "FORCE" GOTO UNINSTALL
+
+IF EXIST "Uninstall\unins000.exe" (
+   start Uninstall\unins000.exe
+   exit
+) ELSE (
+   echo This will remove all KoboldAI folders that do not contain user data
+	pause
+	GOTO UNINSTALL
+)
+
+:UNINSTALL
+echo Uninstallation in progress, please wait...
+set DM=Y
+attrib -h .git >nul
+for /d %%D in (*) do if not "%%~nxD"=="stories" if not "%%~nxD"=="userscripts" if not "%%~nxD"=="settings" if not "%%~nxD"=="softprompts" if not "%%~nxD"=="models" if not "%%~nxD"=="Uninstall" rmdir /S /Q %%~nxD
+for %%i in (*) do if not "%%i"=="Uninstall.bat" del /q "%%i"
+set /P DM=Would you like to delete the models folder? (Y/n) :
+IF %DM%==Y rmdir models /s /q
+IF %DM%==y rmdir models /s /q
+set DM=N
+set /P DM=Would you like to delete all other user folders? (y/N) :
+IF %DM%==Y rmdir stories userscripts settings softprompts /s /q
+IF %DM%==y rmdir stories userscripts settings softprompts /s /q
+del Uninstall.bat
--- a/aiserver.py
+++ b/aiserver.py
@ -1,7 +1,7 @@
 #!/usr/bin/python3
 #==================================================================#
 # KoboldAI
-# Version: 1.17.0
+# Version: 1.18.1
 # By: KoboldAIDev and the KoboldAI Community
 #==================================================================#

@ -16,6 +16,9 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 from eventlet import tpool

+import logging
+logging.getLogger("urllib3").setLevel(logging.ERROR)
+
 from os import path, getcwd
 import time
 import re
@ -23,6 +26,7 @@ import json
 import collections
 import zipfile
 import packaging
+import packaging.version
 import contextlib
 import traceback
 import threading
@ -54,6 +58,27 @@ if lupa.LUA_VERSION[:2] != (5, 4):
    print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)


+# Make sure tqdm progress bars display properly in Colab
+from tqdm.auto import tqdm
+old_init = tqdm.__init__
+def new_init(self, *args, **kwargs):
+    old_init(self, *args, **kwargs)
+    if(self.ncols == 0 and kwargs.get("ncols") != 0):
+        self.ncols = 99
+tqdm.__init__ = new_init
+
+# Fix some issues with the OPT tokenizer
+from transformers import PreTrainedTokenizerBase
+old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
+@classmethod
+def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
+    tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
+    tokenizer._koboldai_header = tokenizer.encode("")
+    tokenizer.add_bos_token = False
+    tokenizer.add_prefix_space = False
+    return tokenizer
+PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
+
 #==================================================================#
 # Variables & Storage
 #==================================================================#
@ -76,9 +101,9 @@ mainmenu = [
    ["Adventure Models", "adventurelist", ""],
    ["Novel Models", "novellist", ""],
    ["NSFW Models", "nsfwlist", ""],
-    ["Chatbot Models", "chatlist", ""],
    ["Untuned GPT-Neo/J", "gptneolist", ""],
    ["Untuned Fairseq Dense", "fsdlist", ""],
+    ["Untuned OPT", "optlist", ""],
    ["Untuned XGLM", "xglmlist", ""],
    ["Untuned GPT2", "gpt2list", ""],
    ["Online Services", "apilist", ""],
@ -86,8 +111,10 @@ mainmenu = [
    ]

 adventurelist= [
+    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
    ["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
    ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
+    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
    ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
    ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
    ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
@ -95,11 +122,13 @@ adventurelist= [
 ]

 novellist= [
+    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
    ["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
    ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
    ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
    ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
    ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
+    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
    ["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
    ["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
    ["Return to Main Menu", "Return", ""],
@ -137,6 +166,17 @@ gpt2list = [
    ["Return to Main Menu", "Return", ""],
    ]

+optlist = [
+    ["OPT 30B", "facebook/opt-30b", "64GB"],
+    ["OPT 13B", "facebook/opt-13b", "32GB"],
+    ["OPT 6.7B", "facebook/opt-6.7b", "16GB"],
+    ["OPT 2.7B", "facebook/opt-2.7b", "8GB"],
+    ["OPT 1.3B", "facebook/opt-1.3b", "4GB"],
+    ["OPT 350M", "facebook/opt-350m", "2GB"],
+    ["OPT 125M", "facebook/opt-125m", "1GB"],
+    ["Return to Main Menu", "Return", ""],
+    ]
+
 fsdlist = [
    ["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
    ["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
@ -172,7 +212,7 @@ class vars:
    model_type  = ""     # Model Type (Automatically taken from the model config)
    noai        = False  # Runs the script without starting up the transformers pipeline
    aibusy      = False  # Stops submissions while the AI is working
-    max_length  = 1024    # Maximum number of tokens to submit per action
+    max_length  = 2048    # Maximum number of tokens to submit per action
    ikmax       = 3000   # Maximum number of characters to submit to InferKit
    genamt      = 80     # Amount of text for each action to generate
    ikgen       = 200    # Number of characters for InferKit to generate
@ -182,6 +222,7 @@ class vars:
    temp        = 0.5    # Default generator temperature
    top_p       = 0.9    # Default generator top_p
    top_k       = 0      # Default generator top_k
+    top_a       = 0.0    # Default generator top-a
    tfs         = 1.0    # Default generator tfs (tail-free sampling)
    typical     = 1.0    # Default generator typical sampling threshold
    numseqs     = 1     # Number of sequences to ask the generator to create
@ -228,6 +269,8 @@ class vars:
    # badwords    = []     # Array of str/chr values that should be removed from output
    badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
    badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
+    badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]]
+    fp32_model  = False  # Whether or not the most recently loaded HF model was in fp32 format
    deletewi    = None   # Temporary storage for UID to delete
    wirmvwhtsp  = False  # Whether to remove leading whitespace from WI entries
    widepth     = 3      # How many historical actions to scan for WI hits
@ -262,7 +305,7 @@ class vars:
    recentrngm  = None   # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None
    useprompt   = False   # Whether to send the full prompt with every submit action
    breakmodel  = False  # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only
-    bmsupported = False  # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM only, currently)
+    bmsupported = False  # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM/OPT only, currently)
    nobreakmodel = False  # Something specifically requested Breakmodel to be disabled (For example a models config)
    smandelete  = False  # Whether stories can be deleted from inside the browser
    smanrename  = False  # Whether stories can be renamed from inside the browser
@ -274,6 +317,7 @@ class vars:
    acregex_ui  = re.compile(r'^ *(&gt;.*)$', re.MULTILINE)    # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses)
    comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)')  # Pattern for matching comments to remove them before sending them to the AI
    comregex_ui = re.compile(r'(&lt;\|(?:.|\n)*?\|&gt;)')  # Pattern for matching comments in the editor
+    sampler_order = utils.default_sampler_order.copy()
    chatmode    = False
    chatname    = "You"
    adventure   = False
@ -288,7 +332,7 @@ class vars:
    quiet       = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
    debug       = False # If set to true, will send debug information to the client for display
    lazy_load   = True  # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
-    use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != ""  # Whether or not we're in a Colab TPU instance and are going to use the TPU rather than the CPU
+    use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" or os.environ.get("TPU_NAME", "") != ""  # Whether or not we're in a Colab TPU instance or Kaggle TPU instance and are going to use the TPU rather than the CPU

 utils.vars = vars

@ -379,7 +423,7 @@ def device_list(n_layers, primary=None, selected=None):
 def device_config(config):
    global breakmodel, generator
    import breakmodel
-    n_layers = config.num_layers if hasattr(config, "num_layers") else config.n_layer
+    n_layers = utils.num_layers(config)
    if(args.breakmodel_gpulayers is not None):
        try:
            breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
@ -452,7 +496,7 @@ def device_config(config):
    # If all layers are on the same device, use the old GPU generation mode
    while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
        breakmodel.gpu_blocks.pop()
-    if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, config.num_layers if hasattr(config, "num_layers") else config.n_layer)):
+    if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, utils.num_layers(config))):
        vars.breakmodel = False
        vars.usegpu = True
        vars.gpu_device = len(breakmodel.gpu_blocks)-1
@ -484,22 +528,33 @@ def move_model_to_devices(model):
            model.lm_head.to(breakmodel.primary_device)
        if(hasattr(model.transformer, 'wpe')):
            model.transformer.wpe.to(breakmodel.primary_device)
-    else:
+    elif(not hasattr(model.model, "decoder")):
        model.model.embed_tokens.to(breakmodel.primary_device)
        model.model.layer_norm.to(breakmodel.primary_device)
        model.lm_head.to(breakmodel.primary_device)
        model.model.embed_positions.to(breakmodel.primary_device)
+    else:
+        model.model.decoder.embed_tokens.to(breakmodel.primary_device)
+        if(model.model.decoder.project_in is not None):
+            model.model.decoder.project_in.to(breakmodel.primary_device)
+        if(model.model.decoder.project_out is not None):
+            model.model.decoder.project_out.to(breakmodel.primary_device)
+        model.model.decoder.embed_positions.to(breakmodel.primary_device)
    gc.collect()
    GPTNeoModel.forward = breakmodel.new_forward_neo
    if("GPTJModel" in globals()):
        GPTJModel.forward = breakmodel.new_forward_neo # type: ignore
    if("XGLMModel" in globals()):
        XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore
+    if("OPTDecoder" in globals()):
+        OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore
    generator = model.generate
    if(hasattr(model, "transformer")):
        breakmodel.move_hidden_layers(model.transformer)
-    else:
+    elif(not hasattr(model.model, "decoder")):
        breakmodel.move_hidden_layers(model.model, model.model.layers)
+    else:
+        breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers)

 #==================================================================#
 #  Allow the models to override some settings
@ -515,13 +570,17 @@ def loadmodelsettings():
                js   = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))            
        except Exception as e:
            js   = {}
-    if vars.model_type == "xglm" or vars.model_type == "opt" or js.get("compat", "j") == "fairseq_lm":
+    if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
        vars.newlinemode = "s"  # Default to </s> newline mode if using XGLM
+    if vars.model_type == "opt":
+        vars.newlinemode = "ns"  # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
    vars.modelconfig = js
    if("badwordsids" in js):
        vars.badwordsids = js["badwordsids"]
    if("nobreakmodel" in js):
        vars.nobreakmodel = js["nobreakmodel"]
+    if("sampler_order" in js):
+        vars.sampler_order = js["sampler_order"]
    if("temp" in js):
        vars.temp       = js["temp"]
    if("top_p" in js):
@ -532,6 +591,8 @@ def loadmodelsettings():
        vars.tfs        = js["tfs"]
    if("typical" in js):
        vars.typical    = js["typical"]
+    if("top_a" in js):
+        vars.top_a      = js["top_a"]
    if("rep_pen" in js):
        vars.rep_pen    = js["rep_pen"]
    if("rep_pen_slope" in js):
@ -563,11 +624,13 @@ def savesettings():
    js = {}
    js["apikey"]      = vars.apikey
    js["andepth"]     = vars.andepth
+    js["sampler_order"] = vars.sampler_order
    js["temp"]        = vars.temp
    js["top_p"]       = vars.top_p
    js["top_k"]       = vars.top_k
    js["tfs"]         = vars.tfs
    js["typical"]     = vars.typical
+    js["top_a"]       = vars.top_a
    js["rep_pen"]     = vars.rep_pen
    js["rep_pen_slope"] = vars.rep_pen_slope
    js["rep_pen_range"] = vars.rep_pen_range
@ -615,88 +678,102 @@ def settingschanged():
 #==================================================================#
 #  Read settings from client file JSON and send to vars
 #==================================================================#
+
 def loadsettings():
+    if(path.exists("defaults/" + getmodelname().replace('/', '_') + ".settings")):
+        # Read file contents into JSON object
+        file = open("defaults/" + getmodelname().replace('/', '_') + ".settings", "r")
+        js   = json.load(file)
+        
+        processsettings(js)
+        file.close()
    if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
        # Read file contents into JSON object
        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
        js   = json.load(file)
        
-        # Copy file contents to vars
-        if("apikey" in js):
-            vars.apikey     = js["apikey"]
-        if("andepth" in js):
-            vars.andepth    = js["andepth"]
-        if("temp" in js):
-            vars.temp       = js["temp"]
-        if("top_p" in js):
-            vars.top_p      = js["top_p"]
-        if("top_k" in js):
-            vars.top_k      = js["top_k"]
-        if("tfs" in js):
-            vars.tfs        = js["tfs"]
-        if("typical" in js):
-            vars.typical    = js["typical"]
-        if("rep_pen" in js):
-            vars.rep_pen    = js["rep_pen"]
-        if("rep_pen_slope" in js):
-            vars.rep_pen_slope = js["rep_pen_slope"]
-        if("rep_pen_range" in js):
-            vars.rep_pen_range = js["rep_pen_range"]
-        if("genamt" in js):
-            vars.genamt     = js["genamt"]
-        if("max_length" in js):
-            vars.max_length = js["max_length"]
-        if("ikgen" in js):
-            vars.ikgen      = js["ikgen"]
-        if("formatoptns" in js):
-            vars.formatoptns = js["formatoptns"]
-        if("numseqs" in js):
-            vars.numseqs = js["numseqs"]
-        if("widepth" in js):
-            vars.widepth = js["widepth"]
-        if("useprompt" in js):
-            vars.useprompt = js["useprompt"]
-        if("adventure" in js):
-            vars.adventure = js["adventure"]
-        if("chatmode" in js):
-            vars.chatmode = js["chatmode"]
-        if("chatname" in js):
-            vars.chatname = js["chatname"]
-        if("dynamicscan" in js):
-            vars.dynamicscan = js["dynamicscan"]
-        if("nopromptgen" in js):
-            vars.nopromptgen = js["nopromptgen"]
-        if("rngpersist" in js):
-            vars.rngpersist = js["rngpersist"]
-        if("nogenmod" in js):
-            vars.nogenmod = js["nogenmod"]
-        if("autosave" in js):
-            vars.autosave = js["autosave"]
-        if("newlinemode" in js):
-            vars.newlinemode = js["newlinemode"]
-        if("welcome" in js):
-            vars.welcome = js["welcome"]
-
-        if("antemplate" in js):
-            vars.setauthornotetemplate = js["antemplate"]
-            if(not vars.gamestarted):
-                vars.authornotetemplate = vars.setauthornotetemplate
-        
-        if("userscripts" in js):
-            vars.userscripts = []
-            for userscript in js["userscripts"]:
-                if type(userscript) is not str:
-                    continue
-                userscript = userscript.strip()
-                if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
-                    vars.userscripts.append(userscript)
-
-        if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
-            vars.corescript = js["corescript"]
-        else:
-            vars.corescript = "default.lua"
-
+        processsettings(js)
        file.close()
+        
+def processsettings(js):
+# Copy file contents to vars
+    if("apikey" in js):
+        vars.apikey     = js["apikey"]
+    if("andepth" in js):
+        vars.andepth    = js["andepth"]
+    if("sampler_order" in js):
+        vars.sampler_order = js["sampler_order"]
+    if("temp" in js):
+        vars.temp       = js["temp"]
+    if("top_p" in js):
+        vars.top_p      = js["top_p"]
+    if("top_k" in js):
+        vars.top_k      = js["top_k"]
+    if("tfs" in js):
+        vars.tfs        = js["tfs"]
+    if("typical" in js):
+        vars.typical    = js["typical"]
+    if("top_a" in js):
+        vars.top_a      = js["top_a"]
+    if("rep_pen" in js):
+        vars.rep_pen    = js["rep_pen"]
+    if("rep_pen_slope" in js):
+        vars.rep_pen_slope = js["rep_pen_slope"]
+    if("rep_pen_range" in js):
+        vars.rep_pen_range = js["rep_pen_range"]
+    if("genamt" in js):
+        vars.genamt     = js["genamt"]
+    if("max_length" in js):
+        vars.max_length = js["max_length"]
+    if("ikgen" in js):
+        vars.ikgen      = js["ikgen"]
+    if("formatoptns" in js):
+        vars.formatoptns = js["formatoptns"]
+    if("numseqs" in js):
+        vars.numseqs = js["numseqs"]
+    if("widepth" in js):
+        vars.widepth = js["widepth"]
+    if("useprompt" in js):
+        vars.useprompt = js["useprompt"]
+    if("adventure" in js):
+        vars.adventure = js["adventure"]
+    if("chatmode" in js):
+        vars.chatmode = js["chatmode"]
+    if("chatname" in js):
+        vars.chatname = js["chatname"]
+    if("dynamicscan" in js):
+        vars.dynamicscan = js["dynamicscan"]
+    if("nopromptgen" in js):
+        vars.nopromptgen = js["nopromptgen"]
+    if("rngpersist" in js):
+        vars.rngpersist = js["rngpersist"]
+    if("nogenmod" in js):
+        vars.nogenmod = js["nogenmod"]
+    if("autosave" in js):
+        vars.autosave = js["autosave"]
+    if("newlinemode" in js):
+        vars.newlinemode = js["newlinemode"]
+    if("welcome" in js):
+        vars.welcome = js["welcome"]
+
+    if("antemplate" in js):
+        vars.setauthornotetemplate = js["antemplate"]
+        if(not vars.gamestarted):
+            vars.authornotetemplate = vars.setauthornotetemplate
+    
+    if("userscripts" in js):
+        vars.userscripts = []
+        for userscript in js["userscripts"]:
+            if type(userscript) is not str:
+                continue
+            userscript = userscript.strip()
+            if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
+                vars.userscripts.append(userscript)
+
+    if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
+        vars.corescript = js["corescript"]
+    else:
+        vars.corescript = "default.lua"

 #==================================================================#
 #  Load a soft prompt from a file
@ -760,7 +837,7 @@ def spRequest(filename):
        tensor = tensor.reshape(
            tpu_mtj_backend.params["cores_per_replica"],
            -1,
-            tpu_mtj_backend.params["d_model"],
+            tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
        )
        vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
    else:
@ -782,6 +859,7 @@ parser.add_argument("--ngrok", action='store_true', help="Optimizes KoboldAI for
 parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel")
 parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service")
 parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
+parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
 parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
 parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
 parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
@ -841,6 +919,8 @@ if args.cpu:
 vars.smandelete = vars.host == args.override_delete
 vars.smanrename = vars.host == args.override_rename

+vars.aria2_port = args.aria2_port or 6799
+
 # Select a model to run
 if args.model:
    print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model)
@ -894,12 +974,15 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe
        print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
        vars.model_type = "gpt_neo"

+    if(vars.model_type == "opt"):
+        vars.badwordsids = vars.badwordsids_opt
+
 if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
    loadmodelsettings()
    loadsettings()
    print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
    vars.hascuda = torch.cuda.is_available()
-    vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm") and not vars.nobreakmodel
+    vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
    if(args.breakmodel is not None and args.breakmodel):
        print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
    if(args.breakmodel_layers is not None):
@ -1111,17 +1194,36 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                globals()[m] = getattr(__import__("transformers"), m)
            except:
                pass
+        try:
+            from transformers.models.opt.modeling_opt import OPTDecoder
+        except:
+            pass
        import transformers.generation_utils
        from transformers import __version__ as transformers_version

        from transformers import PreTrainedModel
+        from transformers import modeling_utils
        old_from_pretrained = PreTrainedModel.from_pretrained.__func__
        @classmethod
        def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+            vars.fp32_model = False
+            utils.num_shards = None
+            utils.current_shard = 0
+            utils.from_pretrained_model_name = pretrained_model_name_or_path
+            utils.from_pretrained_index_filename = None
+            utils.from_pretrained_kwargs = kwargs
+            utils.bar = None
            if not args.no_aria2:
                utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
            return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
        PreTrainedModel.from_pretrained = new_from_pretrained
+        if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
+            old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
+            def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
+                utils.num_shards = utils.get_num_shards(index_filename)
+                utils.from_pretrained_index_filename = index_filename
+                return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
+            modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files

        # Lazy loader
        import torch_lazy_loader
@ -1139,6 +1241,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                ram_blocks = gpu_blocks = cumulative_gpu_blocks = None

            def lazy_load_callback(model_dict, f, **_):
+                if lazy_load_callback.nested:
+                    return
+                lazy_load_callback.nested = True
+
                device_map = {}

                for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
@ -1153,12 +1259,22 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                    if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
                        device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"

+                if utils.num_shards is None or utils.current_shard == 0:
+                    if utils.num_shards is not None:
+                        num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                    else:
+                        num_tensors = len(device_map)
+                    print(flush=True)
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+
                with zipfile.ZipFile(f, "r") as z:
                    try:
                        last_storage_key = None
                        f = None
                        current_offset = 0
-                        for key in tqdm(sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
+                        if utils.num_shards is not None:
+                            utils.current_shard += 1
+                        for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
                            storage_key = model_dict[key].key
                            if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
                                last_storage_key = storage_key
@ -1175,6 +1291,8 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                            nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
                            #print(f"Transferring <{key}>  to  {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
                            model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
+                            if model_dict[key].dtype is torch.float32:
+                                vars.fp32_model = True
                            if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
                                model_dict[key] = model_dict[key].to(torch.float16)
                            if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
@ -1182,10 +1300,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                            model_dict[key] = model_dict[key].to(device)
                            #print("OK", flush=True)
                            current_offset += nbytes
+                            utils.bar.update(1)
                    finally:
+                        if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                            utils.bar.close()
+                            utils.bar = None
+                        lazy_load_callback.nested = False
                        if isinstance(f, zipfile.ZipExtFile):
                            f.close()

+            lazy_load_callback.nested = False
            return lazy_load_callback

        lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
@ -1231,8 +1355,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                input_ids.clamp_(max=self.config.vocab_size-1)
                if(hasattr(self, "transformer")):
                    inputs_embeds = self.transformer.wte(input_ids)
-                else:
+                elif(not hasattr(self.model, "decoder")):
                    inputs_embeds = self.model.embed_tokens(input_ids)
+                else:
+                    inputs_embeds = self.model.decoder.embed_tokens(input_ids)
                if(vars.sp is not None):
                    vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
                    inputs_embeds = torch.where(
@ -1240,23 +1366,42 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                        vars.sp[shifted_input_ids.clamp(min=0)],
                        inputs_embeds,
                    )
-                if(not hasattr(self, "transformer")):
+                if(hasattr(self, "model") and hasattr(self.model, "embed_scale")):
                    inputs_embeds *= self.model.embed_scale
                kwargs['inputs_embeds'] = inputs_embeds
                return old_forward(self, *args, **kwargs)
            cls.forward = new_causallm_forward
        for cls in (GPT2LMHeadModel, GPTNeoForCausalLM):
            patch_causallm(cls)
-        for c in ("GPTJForCausalLM", "XGLMForCausalLM"):
+        for c in ("GPTJForCausalLM", "XGLMForCausalLM", "OPTForCausalLM"):
            try:
                patch_causallm(getattr(__import__("transformers"), c))
            except:
                pass


+        # Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
+        if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")):
+            try:
+                from transformers import OPTForCausalLM, OPTModel
+            except ImportError:
+                pass
+            else:
+                # This is the same as the original __init__ but with
+                # config.hidden_size
+                # replaced with
+                # config.word_embed_proj_dim
+                def new_init(self, config):
+                    super(OPTForCausalLM, self).__init__(config)
+                    self.model = OPTModel(config)
+                    self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
+                    self.post_init()
+                OPTForCausalLM.__init__ = new_init
+
+
        # Patch transformers to use our custom logit warpers
        from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
-        from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper
+        from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper, TopALogitsWarper

        def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
            old_call = cls.__call__
@ -1276,6 +1421,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
            cls.__call__ = new_call
        dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
        dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
+        dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
        dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
        dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
        dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
@ -1319,21 +1465,30 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
        new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor
        transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor

+        class KoboldLogitsWarperList(LogitsProcessorList):
+            def __init__(self, beams: int = 1, **kwargs):
+                self.__warper_list: List[LogitsWarper] = []
+                self.__warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
+                self.__warper_list.append(TopALogitsWarper(top_a=0.5, min_tokens_to_keep=1 + (beams > 1)))
+                self.__warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
+                self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
+                self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
+                self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5))
+
+            def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs):
+                for k in vars.sampler_order:
+                    scores = self.__warper_list[k](input_ids, scores, *args, **kwargs)
+                return scores
+
        def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
-            warper_list = LogitsProcessorList()
-            warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
-            warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
-            warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
-            warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
-            warper_list.append(TemperatureLogitsWarper(temperature=0.5))
-            return warper_list
+            return KoboldLogitsWarperList(beams=beams)
        
        def new_sample(self, *args, **kwargs):
            assert kwargs.pop("logits_warper", None) is not None
            kwargs["logits_warper"] = new_get_logits_warper(
                beams=1,
            )
-            if(vars.newlinemode == "s"):
+            if(vars.newlinemode == "s") or (vars.newlinemode == "ns"):
                kwargs["eos_token_id"] = -1
                kwargs.setdefault("pad_token_id", 2)
            return new_sample.old_sample(self, *args, **kwargs)
@ -1408,12 +1563,18 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go

        def get_hidden_size_from_model(model):
            try:
-                return int(model.transformer.hidden_size)
+                return int(model.model.decoder.project_in.in_features)
            except:
                try:
-                    return int(model.transformer.embed_dim)
+                    return int(model.model.decoder.embed_tokens.out_features)
                except:
-                    return int(model.lm_head.in_features)
+                    try:
+                        return int(model.transformer.hidden_size)
+                    except:
+                        try:
+                            return int(model.transformer.embed_dim)
+                        except:
+                            return int(model.lm_head.in_features)
        
        def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
            if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
@ -1468,12 +1629,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                import shutil
                shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
            print("\n", flush=True)
-            with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer) if vars.lazy_load else None, dematerialized_modules=True):
+            with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
                if(vars.lazy_load):  # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
                    lowmem = {}
                if(os.path.isdir(vars.custmodpth)):
                    try:
                        tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        pass
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
                    except Exception as e:
                        try:
                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1486,6 +1651,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
                    try:
                        tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        pass
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
                    except Exception as e:
                        try:
                            tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1496,8 +1665,25 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                    except Exception as e:
                        model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
                else:
+                    old_rebuild_tensor = torch._utils._rebuild_tensor
+                    def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
+                        if(not isinstance(storage, torch_lazy_loader.LazyTensor)):
+                            dtype = storage.dtype
+                        else:
+                            dtype = storage.storage_type.dtype
+                            if(not isinstance(dtype, torch.dtype)):
+                                dtype = storage.storage_type(0).dtype
+                        if(dtype is torch.float32 and len(shape) >= 2):
+                            vars.fp32_model = True
+                        return old_rebuild_tensor(storage, storage_offset, shape, stride)
+                    torch._utils._rebuild_tensor = new_rebuild_tensor
+
                    try:
                        tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        pass
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
                    except Exception as e:
                        try:
                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
@ -1508,11 +1694,32 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                    except Exception as e:
                        model     = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)

+                    torch._utils._rebuild_tensor = old_rebuild_tensor
+
                    if not args.colab or args.savemodel:
                        import shutil
-                        model = model.half()
-                        model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
                        tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
+                        if(vars.fp32_model):  # Use save_pretrained to convert fp32 models to fp16
+                            model = model.half()
+                            model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
+                        else:  # For fp16 models, we can just copy the model files directly
+                            import transformers.configuration_utils
+                            import transformers.modeling_utils
+                            import transformers.file_utils
+                            # Save the config.json
+                            shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
+                            if(utils.num_shards is None):
+                                # Save the pytorch_model.bin of an unsharded model
+                                shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
+                            else:
+                                with open(utils.from_pretrained_index_filename) as f:
+                                    map_data = json.load(f)
+                                filenames = set(map_data["weight_map"].values())
+                                # Save the pytorch_model.bin.index.json of a sharded model
+                                shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
+                                # Then save the pytorch_model-#####-of-#####.bin files
+                                for filename in filenames:
+                                    shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
                        shutil.rmtree("cache/")
            
            if(vars.hascuda):
@ -1548,13 +1755,28 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
 else:
    from transformers import PreTrainedModel
+    from transformers import modeling_utils
    old_from_pretrained = PreTrainedModel.from_pretrained.__func__
    @classmethod
    def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        vars.fp32_model = False
+        utils.num_shards = None
+        utils.current_shard = 0
+        utils.from_pretrained_model_name = pretrained_model_name_or_path
+        utils.from_pretrained_index_filename = None
+        utils.from_pretrained_kwargs = kwargs
+        utils.bar = None
        if not args.no_aria2:
            utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
        return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    PreTrainedModel.from_pretrained = new_from_pretrained
+    if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
+        old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
+        def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
+            utils.num_shards = utils.get_num_shards(index_filename)
+            utils.from_pretrained_index_filename = index_filename
+            return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
+        modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files

    def tpumtjgetsofttokens():
        soft_tokens = None
@ -1562,14 +1784,14 @@ else:
            global np
            if 'np' not in globals():
                import numpy as np
-            tensor = np.zeros((1, tpu_mtj_backend.params["d_model"]), dtype=np.float32)
+            tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
            rows = tensor.shape[0]
            padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
            tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
            tensor = tensor.reshape(
                tpu_mtj_backend.params["cores_per_replica"],
                -1,
-                tpu_mtj_backend.params["d_model"],
+                tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
            )
            vars.sp = tpu_mtj_backend.shard_xmap(tensor)
        soft_tokens = np.arange(
@ -1631,11 +1853,13 @@ else:
    
    def tpumtjgenerate_settings_callback() -> dict:
        return {
+            "sampler_order": vars.sampler_order,
            "top_p": float(vars.top_p),
            "temp": float(vars.temp),
            "top_k": int(vars.top_k),
            "tfs": float(vars.tfs),
            "typical": float(vars.typical),
+            "top_a": float(vars.top_a),
            "repetition_penalty": float(vars.rep_pen),
            "rpslope": float(vars.rep_pen_slope),
            "rprange": int(vars.rep_pen_range),
@ -1658,7 +1882,7 @@ else:
        if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
            raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
        import tpu_mtj_backend
-        if(vars.model == "TPUMeshTransformerGPTNeoX"):
+        if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"):
            tpu_mtj_backend.pad_token_id = 1
        tpu_mtj_backend.vars = vars
        tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
@ -1670,7 +1894,7 @@ else:
        loadmodelsettings()
        loadsettings()
        tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
-        vars.modeldim = int(tpu_mtj_backend.params["d_model"])
+        vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]))
        tokenizer = tpu_mtj_backend.tokenizer
    else:
        loadsettings()
@ -1998,6 +2222,7 @@ def lua_has_setting(setting):
        "settopk",
        "settfs",
        "settypical",
+        "settopa",
        "setreppen",
        "setreppenslope",
        "setreppenrange",
@ -2017,6 +2242,7 @@ def lua_has_setting(setting):
        "top_k",
        "tfs",
        "typical",
+        "topa",
        "reppen",
        "reppenslope",
        "reppenrange",
@ -2051,6 +2277,7 @@ def lua_get_setting(setting):
    if(setting in ("settopk", "topk", "top_k")): return vars.top_k
    if(setting in ("settfs", "tfs")): return vars.tfs
    if(setting in ("settypical", "typical")): return vars.typical
+    if(setting in ("settopa", "topa")): return vars.top_a
    if(setting in ("setreppen", "reppen")): return vars.rep_pen
    if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
    if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
@ -2086,6 +2313,7 @@ def lua_set_setting(setting, v):
    if(setting in ("settopk", "topk")): vars.top_k = v
    if(setting in ("settfs", "tfs")): vars.tfs = v
    if(setting in ("settypical", "typical")): vars.typical = v
+    if(setting in ("settopa", "topa")): vars.top_a = v
    if(setting in ("setreppen", "reppen")): vars.rep_pen = v
    if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
    if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
@ -2510,6 +2738,11 @@ def get_message(msg):
        emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
        settingschanged()
        refresh_settings()
+    elif(msg['cmd'] == 'settopa'):
+        vars.top_a = float(msg['data'])
+        emit('from_server', {'cmd': 'setlabeltopa', 'data': msg['data']}, broadcast=True)
+        settingschanged()
+        refresh_settings()
    elif(msg['cmd'] == 'setreppen'):
        vars.rep_pen = float(msg['data'])
        emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
@ -2663,6 +2896,8 @@ def get_message(msg):
    elif(msg['cmd'] == 'uslistrequest'):
        unloaded, loaded = getuslist()
        emit('from_server', {'cmd': 'buildus', 'data': {"unloaded": unloaded, "loaded": loaded}})
+    elif(msg['cmd'] == 'samplerlistrequest'):
+        emit('from_server', {'cmd': 'buildsamplers', 'data': vars.sampler_order})
    elif(msg['cmd'] == 'usloaded'):
        vars.userscripts = []
        for userscript in msg['data']:
@ -2676,6 +2911,16 @@ def get_message(msg):
        load_lua_scripts()
        unloaded, loaded = getuslist()
        sendUSStatItems()
+    elif(msg['cmd'] == 'samplers'):
+        sampler_order = msg["data"]
+        if(not isinstance(sampler_order, list)):
+            raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
+        if(len(sampler_order) != len(vars.sampler_order)):
+            raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}")
+        if(not all(isinstance(e, int) for e in sampler_order)):
+            raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element")
+        vars.sampler_order = sampler_order
+        settingschanged()
    elif(msg['cmd'] == 'loadselect'):
        vars.loadselect = msg["data"]
    elif(msg['cmd'] == 'spselect'):
@ -3104,24 +3349,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
        global tokenizer
        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")

+    lnheader = len(tokenizer._koboldai_header)
+
    # Calculate token budget
    prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
    lnprompt   = len(prompttkns)

    memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
    lnmem     = len(memtokens)
-    if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
+    if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
        raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")

    witokens  = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
    lnwi      = len(witokens)
-    if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
+    if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
        raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")

    if(anotetxt != ""):
        anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
        lnanote   = len(anotetkns)
-        if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
+        if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
            raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")

    if(vars.useprompt):
@ -3132,14 +3379,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
    lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
    maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0

-    if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
+    if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
        raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")

    assert budget >= 0

    if(actionlen == 0):
        # First/Prompt action
-        tokens = memtokens + witokens + anotetkns + prompttkns
+        tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
        assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
        ln = len(tokens) + lnsp
        return tokens, ln+1, ln+vars.genamt
@ -3187,12 +3434,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
        # Did we get to add the A.N.? If not, do it here
        if(anotetxt != ""):
            if((not anoteadded) or forceanote):
-                tokens = memtokens + witokens + anotetkns + prompttkns + tokens
+                tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
            else:
-                tokens = memtokens + witokens + prompttkns + tokens
+                tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
        else:
            # Prepend Memory, WI, and Prompt before action tokens
-            tokens = memtokens + witokens + prompttkns + tokens
+            tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens

        # Send completed bundle to generator
        assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
@ -3570,6 +3817,7 @@ def sendtocolab(txt, min, max):
        'top_k': vars.top_k,
        'tfs': vars.tfs,
        'typical': vars.typical,
+        'topa': vars.top_a,
        'numseqs': vars.numseqs,
        'retfultxt': False
    }
@ -3707,12 +3955,14 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
                top_k=vars.top_k,
                tfs=vars.tfs,
                typical=vars.typical,
+                top_a=vars.top_a,
                numseqs=vars.numseqs,
                repetition_penalty=vars.rep_pen,
                rpslope=vars.rep_pen_slope,
                rprange=vars.rep_pen_range,
                soft_embeddings=vars.sp,
                soft_tokens=soft_tokens,
+                sampler_order=vars.sampler_order,
            )
            past = genout
            for i in range(vars.numseqs):
@ -3893,6 +4143,7 @@ def refresh_settings():
        emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
        emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
        emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
+        emit('from_server', {'cmd': 'updatetopa', 'data': vars.top_a}, broadcast=True)
        emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
        emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
        emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)
@ -4469,6 +4720,7 @@ def oairequest(txt, min, max):
            'prompt': txt,
            'max_tokens': vars.genamt,
            'temperature': vars.temp,
+            'top_a': vars.top_a,
            'top_p': vars.top_p,
            'top_k': vars.top_k,
            'tfs': vars.tfs,
--- a/breakmodel.py
+++ b/breakmodel.py
@ -633,11 +633,11 @@ def new_forward_xglm(
            layer_outputs = decoder_layer(
                hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
                attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
-                encoder_hidden_states=encoder_hidden_states.to(device) if encoder_hidden_states is not None else None,
-                encoder_attention_mask=encoder_attention_mask.to(device) if encoder_attention_mask is not None else None,
-                layer_head_mask=((head_mask[idx].to(device) if head_mask[idx] is not None else None) if head_mask is not None else None),
+                encoder_hidden_states=encoder_hidden_states.to(device) if breakmodel and encoder_hidden_states is not None else encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask.to(device) if breakmodel and encoder_attention_mask is not None else encoder_attention_mask,
+                layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
                cross_attn_layer_head_mask=(
-                    (cross_attn_head_mask[idx].to(device) if cross_attn_head_mask[idx] is not None else None) if cross_attn_head_mask is not None else None
+                    (cross_attn_head_mask[idx].to(device) if breakmodel and cross_attn_head_mask[idx] is not None else cross_attn_head_mask[idx]) if cross_attn_head_mask is not None else None
                ),
                past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
                output_attentions=output_attentions,
@ -686,3 +686,177 @@ def new_forward_xglm(
        attentions=all_self_attns,
        cross_attentions=all_cross_attentions,
    )
+
+
+def new_forward_opt(
+    self,
+    input_ids=None,
+    attention_mask=None,
+    head_mask=None,
+    past_key_values=None,
+    inputs_embeds=None,
+    use_cache=None,
+    output_attentions=None,
+    output_hidden_states=None,
+    return_dict=None,
+):
+    assert len(gpu_blocks) <= torch.cuda.device_count()
+    assert sum(gpu_blocks) <= len(self.layers)
+    ram_blocks = len(self.layers) - sum(gpu_blocks)
+    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
+
+
+    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+    output_hidden_states = (
+        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+    )
+    use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+    # retrieve input_ids and inputs_embeds
+    if input_ids is not None and inputs_embeds is not None:
+        raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+    elif input_ids is not None:
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_shape[-1])
+    elif inputs_embeds is not None:
+        input_shape = inputs_embeds.size()[:-1]
+    else:
+        raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+    past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+
+    if inputs_embeds is None:
+        if breakmodel:
+            input_ids = input_ids.to(primary_device) 
+        inputs_embeds = self.embed_tokens(input_ids)
+
+    # embed positions
+    if breakmodel:
+        inputs_embeds = inputs_embeds.to(primary_device) 
+    if attention_mask is None:
+        attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
+
+    positions = self.embed_positions(attention_mask)[:, past_key_values_length:, :]
+    if breakmodel:
+        positions = positions.to(primary_device) 
+
+    attention_mask = self._prepare_decoder_attention_mask(
+        attention_mask, input_shape, inputs_embeds, past_key_values_length
+    )
+
+    if self.project_in is not None:
+        inputs_embeds = self.project_in(inputs_embeds)
+
+    hidden_states = inputs_embeds + positions
+
+    hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
+
+    # decoder layers
+    all_hidden_states = () if output_hidden_states else None
+    all_self_attns = () if output_attentions else None
+    next_decoder_cache = () if use_cache else None
+
+    if breakmodel and ram_blocks:
+        copystream = torch.cuda.Stream(device=primary_device, priority=-1)
+
+    # check if head_mask has a correct number of layers specified if desired
+    for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
+        if attn_mask is not None:
+            if attn_mask.size()[0] != (len(self.layers)):
+                raise ValueError(
+                    f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+                    f" {head_mask.size()[0]}."
+                )
+
+    for idx, decoder_layer in enumerate(self.layers):
+        i = idx
+        if breakmodel:
+            if i in range(ram_blocks):
+                index1 = (i+1)%ram_blocks
+                for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()):
+                    param1.data = param2.data
+                for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()):
+                    with torch.cuda.stream(copystream):
+                        torch.cuda.comm.broadcast(param2.data,out = [param1.data])
+
+        # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        dropout_probability = random.uniform(0, 1)
+        if self.training and (dropout_probability < self.layerdrop):
+            continue
+
+        past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+        if self.gradient_checkpointing and self.training:
+
+            if use_cache:
+                logger.warning(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    # None for past_key_value
+                    return module(*inputs, output_attentions, None)
+
+                return custom_forward
+
+            layer_outputs = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(decoder_layer),
+                hidden_states,
+                attention_mask,
+                head_mask[idx] if head_mask is not None else None,
+                None,
+            )
+        else:
+            if breakmodel:
+                device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
+            layer_outputs = decoder_layer(
+                hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
+                attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
+                layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
+                past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+            )
+
+        hidden_states = layer_outputs[0]
+
+        if use_cache:
+            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+        if output_attentions:
+            all_self_attns += (layer_outputs[1],)
+        
+        if breakmodel:
+            if i in range(ram_blocks):
+                torch.cuda.synchronize()
+                torch.cuda.empty_cache()
+
+    if breakmodel:
+        if ram_blocks:
+            del copystream
+        torch.cuda.empty_cache()
+        hidden_states = hidden_states.to(primary_device)
+    if self.project_out is not None:
+        hidden_states = self.project_out(hidden_states)
+    if breakmodel:
+        hidden_states = hidden_states.to(primary_device)
+
+    # add hidden states from the last decoder layer
+    if output_hidden_states:
+        all_hidden_states += (hidden_states,)
+
+    next_cache = next_decoder_cache if use_cache else None
+    if not return_dict:
+        return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+    return BaseModelOutputWithPast(
+        last_hidden_state=hidden_states,
+        past_key_values=next_cache,
+        hidden_states=all_hidden_states,
+        attentions=all_self_attns,
+    )
--- a/bridge.lua
+++ b/bridge.lua
@ -867,6 +867,7 @@ return function(_python, _bridged)
    ---@field settopk integer
    ---@field settfs number
    ---@field settypical number
+    ---@field settopa number
    ---@field setreppen number
    ---@field setreppenslope number
    ---@field setreppenrange number
@ -884,6 +885,7 @@ return function(_python, _bridged)
    ---@field top_k integer
    ---@field tfs number
    ---@field typical number
+    ---@field topa number
    ---@field reppen number
    ---@field reppenslope number
    ---@field reppenrange number
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@ -7,7 +7,7 @@
      "private_outputs": true,
      "provenance": [],
      "collapsed_sections": [],
-      "authorship_tag": "ABX9TyOKIa/NDLlYI5j63GXPtkXv",
+      "authorship_tag": "ABX9TyPbwW79K9/RkYH9i9rkYFyj",
      "include_colab_link": true
    },
    "kernelspec": {
@ -68,14 +68,20 @@
        "#@title <b><-- Click this to start KoboldAI</b>\n",
        "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
        "\n",
-        "Model = \"KoboldAI/GPT-Neo-2.7B-Janeway\" #@param [\"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
+        "Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\" #@param [\"KoboldAI/fairseq-dense-2.7B-Nerys\", \"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
        "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
+        "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
        "\n",
        "!nvidia-smi\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive/')\n",
        "\n",
-        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version"
+        "if Provider == \"Localtunnel\":\n",
+        "  tunnel = \"--localtunnel yes\"\n",
+        "else:\n",
+        "  tunnel = \"\"\n",
+        "\n",
+        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
      ],
      "execution_count": null,
      "outputs": []
@ -84,27 +90,32 @@
      "cell_type": "markdown",
      "source": [
        "# GPU Edition Model Descriptions\n",
-        "| Model                                                        | Size     | Style      | Description                                                  |\n",
-        "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
-        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
        "\n",
        "# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
        "\n",
-        "| Model                          | Size   | Style     | Drive Space | Description                                                  |\n",
-        "| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n",
-        "| Skein 6B by VE_FORBDRYDERNE    | 6B TPU | Hybrid    | 0 GB         | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n",
-        "| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB         | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| Lit 6B by Haru                 | 6B TPU | NSFW      | 8 GB /  12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| Generic 6B by EleutherAI       | 6B TPU | Generic   | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n",
-        "| C1 6B by Haru                  | 6B TPU | Chatbot   | 8 GB /  12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
+        "| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
+        "| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
        "\n",
        "\n",
        "| Style     | Description                                                  |\n",
@ -113,7 +124,6 @@
        "| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
        "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
        "| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
-        "| Hybrid    | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n",
        "| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
        "\n",
        "# How to start KoboldAI in 7 simple steps\n",
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@ -7,7 +7,7 @@
        "colab_type": "text"
      },
      "source": [
-        "<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
@ -65,8 +65,8 @@
        "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
        "\n",
        "#@title <b><-- Click this to start KoboldAI</b>\n",
-        "Model = \"Janeway 13B\" #@param [\"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"Convo 6B\", \"C1 6B\", \"NeoX 20B\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
-        "Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
+        "Model = \"Nerys 13B\" #@param [\"Nerys 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"facebook/opt-13b\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
+        "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
        "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
        "\n",
        "import os\n",
@ -84,6 +84,10 @@
        "  Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
        "  path = \"\"\n",
        "  download = \"\"\n",
+        "elif Model == \"Nerys 13B\":\n",
+        "  Model = \"KoboldAI/fairseq-dense-13B-Nerys\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Shinen 13B\":\n",
        "  Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
        "  path = \"\"\n",
@ -97,41 +101,25 @@
        "  Drive = \"Unextracted (Less Space)\"\n",
        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.03,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n",
        "elif Model == \"Skein 6B\":\n",
-        "  Model = \"TPUMeshTransformerGPTJ\"\n",
-        "  path = \" -p gpt-j-6b-skein-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \" -a https://storage.henk.tech/KoboldAI/skein-jax.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Skein\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Janeway 6B\":\n",
-        "  Model = \"TPUMeshTransformerGPTJ\"\n",
-        "  path = \" -p gpt-j-6b-janeway-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \" -a https://storage.henk.tech/KoboldAI/janeway-jax.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"rep_pen_slope\\\": 0.7,\\n   \\\"rep_pen_range\\\": 1024.0,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false,\\n      \\\"singleline\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false,\\n   \\\"chatmode\\\": false,\\n   \\\"chatname\\\": \\\"You\\\",\\n   \\\"dynamicscan\\\": false,\\n   \\\"nopromptgen\\\": false,\\n   \\\"rngpersist\\\": false,\\n   \\\"nogenmod\\\": false,\\n   \\\"autosave\\\": false,\\n   \\\"welcome\\\": false,\\n   \\\"newlinemode\\\": \\\"n\\\",\\n   \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n   \\\"userscripts\\\": [],\\n   \\\"corescript\\\": \\\"default.lua\\\",\\n   \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Janeway\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Adventure 6B\":\n",
-        "  Model = \"TPUMeshTransformerGPTJ\"\n",
-        "  path = \" -p gpt-j-6b-adventure-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \" -a https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-jax/aria2.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Lit 6B\":\n",
        "  Model = \"hakurei/lit-6B\"\n",
        "  path = \"\"\n",
        "  download = \"\"\n",
        "elif Model == \"Shinen 6B\":\n",
-        "  Model = \"TPUMeshTransformerGPTJ\"\n",
-        "  path = \" -p gpt-j-6b-shinen-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \" -a https://storage.henk.tech/KoboldAI/shinen-jax.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"rep_pen_slope\\\": 0.7,\\n   \\\"rep_pen_range\\\": 1024.0,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false,\\n      \\\"singleline\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false,\\n   \\\"chatmode\\\": false,\\n   \\\"chatname\\\": \\\"You\\\",\\n   \\\"dynamicscan\\\": false,\\n   \\\"nopromptgen\\\": false,\\n   \\\"rngpersist\\\": false,\\n   \\\"nogenmod\\\": false,\\n   \\\"autosave\\\": false,\\n   \\\"welcome\\\": false,\\n   \\\"newlinemode\\\": \\\"n\\\",\\n   \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n   \\\"userscripts\\\": [],\\n   \\\"corescript\\\": \\\"default.lua\\\",\\n   \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
        "elif Model == \"Convo 6B\":\n",
        "  Model = \"hitomi-team/convo-6B\"\n",
        "  path = \"\"\n",
@ -159,37 +147,37 @@
        "\n",
        "| Model | Size | Style | Description |\n",
        "| --- | --- | --- | --- |\n",
-        "| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
-        "| Skein by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
-        "| Adventure by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
-        "| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
-        "| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
-        "| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
        "\n",
        "\n",
        "# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
        "\n",
-        "| Model                                                        | Size     | Style      | Description                                                  |\n",
-        "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
-        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
        "\n",
-        "| Style     | Description                                                  |\n",
-        "| --------- | ------------------------------------------------------------ |\n",
-        "| Novel     | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
-        "| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
+        "| Style | Description |\n",
+        "| --- | --- |\n",
+        "| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
+        "| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
        "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
-        "| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
-        "| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
+        "| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
+        "| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
        "\n",
        "---\n",
        "## Tips to get the most out of Google Colab\n",
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@ -20,4 +20,5 @@ dependencies:
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - transformers>=4.17
+    - transformers>=4.20.1
+    - accelerate
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@ -20,4 +20,5 @@ dependencies:
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - transformers>=4.17
+    - transformers>=4.20.1
+    - accelerate
--- a/gensettings.py
+++ b/gensettings.py
@ -64,6 +64,17 @@ gensettingstf = [
 	"step": 0.05,
 	"default": 1.0,
    "tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
+	},
+	{
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Top a Sampling",
+	"id": "settopa", 
+	"min": 0.0,
+	"max": 1.0,
+	"step": 0.01,
+	"default": 0.0,
+    "tooltip": "Alternative sampling method that reduces the randomness of the AI whenever the probability of one token is much higher than all the others. Higher values have a stronger effect. Set this setting to 0 to disable its effect."
 	},
 	{
 	"uitype": "slider",
--- a/koboldai.ico
+++ b/koboldai.ico
--- a/koboldaiblue.ico
+++ b/koboldaiblue.ico
--- a/koboldaigreen.ico
+++ b/koboldaigreen.ico
--- a/maps/opt.json
+++ b/maps/opt.json
@ -0,0 +1,37 @@
+{
+  "mtj_compat": "opt",
+  "mtj_pe": "fixed",
+  "mtj_config_map": {
+    "do_layer_norm_before": ["do_layer_norm_before", true],
+    "d_embed": "word_embed_proj_dim",
+    "d_model": "hidden_size",
+    "n_heads": "num_attention_heads",
+    "layers": "num_hidden_layers"
+  },
+  "static_weights": {
+    "decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}},
+    "decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}},
+    "decoder.final_layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+    "decoder.final_layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
+    "decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}}
+  },
+  "layer_weights": {
+    "decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
--- a/readme.md
+++ b/readme.md
@ -50,49 +50,50 @@ Each edition features different models and requires different hardware to run, t

 ### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)

+| Model | Size | Style | Description |
+| --- | --- | --- | --- |
+| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
+| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
+| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
+| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
+| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
+| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
+| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
+| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
+| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
+| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
+| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
+
+## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)

 | Model | Size | Style | Description |
 | --- | --- | --- | --- |
-| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
-| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
-| Skein by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
-| Adventure by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
-| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
-| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
-| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
-| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
-| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
-| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
+| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
+| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
+| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
+| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
+| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |

-
-# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
-
-| Model                                                        | Size     | Style      | Description                                                  |
-| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
-| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
-| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
-| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
-| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
-| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
-| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
-| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
-
-| Style     | Description                                                  |
-| --------- | ------------------------------------------------------------ |
-| Novel     | For regular story writing, not compatible with Adventure mode or other specialty modes. |
-| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
+| Style | Description |
+| --- | --- |
+| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
+| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
 | Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
-| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
-| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
+| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
+| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |

---
 ## Tips to get the most out of Google Colab
-  Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
- KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
- Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
- Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
- Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
- Models stored on Google Drive typically load faster than models we need to download from the internet.
+
+*   Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
+*   KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
+*   Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
+*   Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
+*   Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
+*   Models stored on Google Drive typically load faster than models we need to download from the internet.
+
 ### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)

 | Model | Size | Type | Description |
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-transformers>=4.17
+transformers>=4.20.1
 Flask
 Flask-SocketIO
 requests
@ -11,3 +11,4 @@ markdown
 bleach==4.1.0
 sentencepiece
 protobuf
+accelerate
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@ -5,7 +5,7 @@ requests
 optax >= 0.0.5, <= 0.0.9
 dm-haiku == 0.0.5
 jax == 0.2.21
-transformers >= 4.17
+transformers >= 4.19
 progressbar2
 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
 flask
--- a/static/application.js
+++ b/static/application.js
@ -20,6 +20,7 @@ var button_settings;
 var button_format;
 var button_softprompt;
 var button_userscripts;
+var button_samplers;
 var button_mode;
 var button_mode_label;
 var button_send;
@ -106,6 +107,12 @@ var using_webkit_patch = true;
 var shift_down   = false;
 var do_clear_ent = false;

+// Whether or not an entry in the Userscripts menu is being dragged
+var us_dragging = false;
+
+// Whether or not an entry in the Samplers menu is being dragged
+var samplers_dragging = false;
+
 // Display vars
 var allowtoggle = false;
 var formatcount = 0;
@ -173,20 +180,36 @@ function addSetting(ob) {
 		window["setting_"+ob.id] = refin;  // Is this still needed?
 		window["label_"+ob.id]   = reflb;  // Is this still needed?
 		// Add event function to input
+		var updateLabelColor = function () {
+			var value = (ob.unit === "float" ? parseFloat : parseInt)(reflb.val());
+			if(value > ob.max || value < ob.min) {
+				reflb.addClass("setting-value-warning");
+			} else {
+				reflb.removeClass("setting-value-warning");
+			}
+		}
 		var send = function () {
 			sliders_throttle(ob.id, function () {
-			    socket.send({'cmd': $(refin).attr('id'), 'data': $(refin).val()});
+			    socket.send({'cmd': $(refin).attr('id'), 'data': $(reflb).val()});
 			});
-			reflb.val($(refin).val());
 		}
-		refin.on("input", send);
+		refin.on("input", function (event) {
+			reflb.val(refin.val());
+			updateLabelColor();
+			send();
+		}).on("change", updateLabelColor);
 		reflb.on("change", function (event) {
 			var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value);
-			if(Number.isNaN(value) || value > ob.max || value < ob.min) {
+			if(Number.isNaN(value) || (ob.min >= 0 && value < 0)) {
 				event.target.value = refin.val();
 				return;
 			}
+			if (ob.unit === "float") {
+				value = parseFloat(value.toFixed(3));  // Round to 3 decimal places to help avoid the number being too long to fit in the box
+			}
 			refin.val(value);
+			reflb.val(value);
+			updateLabelColor();
 			send();
 		});
 	} else if(ob.uitype == "toggle"){
@ -957,6 +980,16 @@ function hideUSPopup() {
 	spcontent.html("");
 }

+function showSamplersPopup() {
+	samplerspopup.removeClass("hidden");
+	samplerspopup.addClass("flex");
+}
+
+function hideSamplersPopup() {
+	samplerspopup.removeClass("flex");
+	samplerspopup.addClass("hidden");
+}
+
 function buildLoadList(ar) {
 	disableButtons([load_accept]);
 	loadcontent.html("");
@ -1090,6 +1123,29 @@ function buildUSList(unloaded, loaded) {
 	}
 }

+function buildSamplerList(samplers) {
+	samplerslist.html("");
+	showSamplersPopup();
+	var i;
+	var samplers_lookup_table = [
+		"Top-k Sampling",
+		"Top-a Sampling",
+		"Top-p Sampling",
+		"Tail-free Sampling",
+		"Typical Sampling",
+		"Temperature",
+	]
+	for(i=0; i<samplers.length; i++) {
+		samplerslist.append("<div class=\"flex\">\
+			<div class=\"samplerslistitem flex-row-container\" sid=\""+samplers[i]+"\">\
+				<div class=\"flex-row\">\
+					<div>"+samplers_lookup_table[samplers[i]]+"</div>\
+				</div>\
+			</div>\
+		</div>");
+	}
+}
+
 function highlightLoadLine(ref) {
 	$("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected");
 	ref.addClass("popuplistselected");
@ -1819,6 +1875,7 @@ $(document).ready(function(){
 	button_format     = $('#btn_format');
 	button_softprompt = $("#btn_softprompt");
 	button_userscripts= $("#btn_userscripts");
+	button_samplers   = $("#btn_samplers");
 	button_mode       = $('#btnmode')
 	button_mode_label = $('#btnmode_label')
 	button_send       = $('#btnsend');
@ -1867,6 +1924,10 @@ $(document).ready(function(){
 	usloaded          = $("#uslistloaded");
 	us_accept         = $("#btn_usaccept");
 	us_close          = $("#btn_usclose");
+	samplerspopup     = $("#samplerscontainer");
+	samplerslist      = $("#samplerslist");
+	samplers_accept   = $("#btn_samplersaccept");
+	samplers_close    = $("#btn_samplersclose");
 	nspopup           = $("#newgamecontainer");
 	ns_accept         = $("#btn_nsaccept");
 	ns_close          = $("#btn_nsclose");
@ -1889,7 +1950,7 @@ $(document).ready(function(){
 				modelname = msg.modelname;
 			}
 			refreshTitle();
-			connect_status.html("<b>Connected to KoboldAI Process!</b>");
+			connect_status.html("<b>Connected to KoboldAI!</b>");
 			connect_status.removeClass("color_orange");
 			connect_status.addClass("color_green");
 			// Reset Menus
@ -2059,48 +2120,52 @@ $(document).ready(function(){
 			newTextHighlight($("#n"+msg.data))
 		} else if(msg.cmd == "updatetemp") {
 			// Send current temp value to input
-			$("#settemp").val(parseFloat(msg.data));
 			$("#settempcur").val(msg.data);
+			$("#settemp").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetopp") {
 			// Send current top p value to input
-			$("#settopp").val(parseFloat(msg.data));
 			$("#settoppcur").val(msg.data);
+			$("#settopp").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetopk") {
 			// Send current top k value to input
-			$("#settopk").val(parseFloat(msg.data));
 			$("#settopkcur").val(msg.data);
+			$("#settopk").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetfs") {
 			// Send current tfs value to input
-			$("#settfs").val(parseFloat(msg.data));
 			$("#settfscur").val(msg.data);
+			$("#settfs").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetypical") {
 			// Send current typical value to input
-			$("#settypical").val(parseFloat(msg.data));
 			$("#settypicalcur").val(msg.data);
+			$("#settypical").val(parseFloat(msg.data)).trigger("change");
+		} else if(msg.cmd == "updatetopa") {
+			// Send current top a value to input
+			$("#settopacur").val(msg.data);
+			$("#settopa").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppen") {
 			// Send current rep pen value to input
-			$("#setreppen").val(parseFloat(msg.data));
 			$("#setreppencur").val(msg.data);
+			$("#setreppen").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppenslope") {
 			// Send current rep pen value to input
-			$("#setreppenslope").val(parseFloat(msg.data));
 			$("#setreppenslopecur").val(msg.data);
+			$("#setreppenslope").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppenrange") {
 			// Send current rep pen value to input
-			$("#setreppenrange").val(parseFloat(msg.data));
 			$("#setreppenrangecur").val(msg.data);
+			$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateoutlen") {
 			// Send current output amt value to input
-			$("#setoutput").val(parseInt(msg.data));
 			$("#setoutputcur").val(msg.data);
+			$("#setoutput").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetknmax") {
 			// Send current max tokens value to input
-			$("#settknmax").val(parseInt(msg.data));
 			$("#settknmaxcur").val(msg.data);
+			$("#settknmax").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateikgen") {
 			// Send current max tokens value to input
-			$("#setikgen").val(parseInt(msg.data));
 			$("#setikgencur").val(msg.data);
+			$("#setikgen").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "setlabeltemp") {
 			// Update setting label with value from server
 			$("#settempcur").val(msg.data);
@ -2116,6 +2181,9 @@ $(document).ready(function(){
 		} else if(msg.cmd == "setlabeltypical") {
 			// Update setting label with value from server
 			$("#settypicalcur").val(msg.data);
+		} else if(msg.cmd == "setlabeltypical") {
+			// Update setting label with value from server
+			$("#settopa").val(msg.data);
 		} else if(msg.cmd == "setlabelreppen") {
 			// Update setting label with value from server
 			$("#setreppencur").val(msg.data);
@ -2284,6 +2352,8 @@ $(document).ready(function(){
 			buildSPList(msg.data);
 		} else if(msg.cmd == "buildus") {
 			buildUSList(msg.data.unloaded, msg.data.loaded);
+		} else if(msg.cmd == "buildsamplers") {
+			buildSamplerList(msg.data);
 		} else if(msg.cmd == "askforoverwrite") {
 			// Show overwrite warning
 			show([$(".saveasoverwrite")]);
@ -2304,15 +2374,15 @@ $(document).ready(function(){
 			$("#setnumseqcur").html(msg.data);
 		} else if(msg.cmd == "updatenumseq") {
 			// Send current max tokens value to input
-			$("#setnumseq").val(parseInt(msg.data));
 			$("#setnumseqcur").html(msg.data);
+			$("#setnumseq").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "setlabelwidepth") {
 			// Update setting label with value from server
 			$("#setwidepthcur").html(msg.data);
 		} else if(msg.cmd == "updatewidepth") {
 			// Send current max tokens value to input
-			$("#setwidepth").val(parseInt(msg.data));
 			$("#setwidepthcur").html(msg.data);
+			$("#setwidepth").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateuseprompt") {
 			// Update toggle state
 			$("#setuseprompt").prop('checked', msg.data).change();
@ -2396,9 +2466,39 @@ $(document).ready(function(){
 		}, 2);
 	});

+	var us_click_handler = function(ev) {
+		setTimeout(function() {
+			if (us_dragging) {
+				return;
+			}
+			var target = $(ev.target).closest(".uslistitem")[0];
+			if ($.contains(document.getElementById("uslistunloaded"), target)) {
+				document.getElementById("uslistloaded").appendChild(target);
+			} else {
+				document.getElementById("uslistunloaded").appendChild(target);
+			}
+		}, 10);
+	}
+
+	var samplers_click_handler = function(ev) {
+		setTimeout(function() {
+			if (samplers_dragging) {
+				return;
+			}
+			var target = $(ev.target).closest(".samplerslistitem");
+			var next = target.parent().next().find(".samplerslistitem");
+			if (!next.length) {
+				return;
+			}
+			next.parent().after(target.parent());
+		}, 10);
+	}
+
 	// Make the userscripts menu sortable
 	var us_sortable_settings = {
 		placeholder: "ussortable-placeholder",
+		start: function() { us_dragging = true; },
+		stop: function() { us_dragging = false; },
 		delay: 2,
 		cursor: "move",
 		tolerance: "pointer",
@ -2407,12 +2507,28 @@ $(document).ready(function(){
 		scrollSensitivity: 64,
 		scrollSpeed: 10,
 	}
-	$(usunloaded).sortable($.extend({
+	usunloaded.sortable($.extend({
 		connectWith: "#uslistloaded",
-	}, us_sortable_settings));
-	$(usloaded).sortable($.extend({
+	}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
+	usloaded.sortable($.extend({
 		connectWith: "#uslistunloaded",
-	}, us_sortable_settings));
+	}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
+
+	// Make the samplers menu sortable
+	var samplers_sortable_settings = {
+		placeholder: "samplerssortable-placeholder",
+		start: function() { samplers_dragging = true; },
+		stop: function() { samplers_dragging = false; },
+		delay: 2,
+		cursor: "move",
+		tolerance: "pointer",
+		opacity: 0.21,
+		revert: 173,
+		scrollSensitivity: 64,
+		scrollSpeed: 10,
+	}
+	samplerslist.sortable($.extend({
+	}, samplers_sortable_settings)).on("click", ".samplerslistitem", samplers_click_handler);

 	// Bind actions to UI buttons
 	button_send.on("click", function(ev) {
@ -2548,6 +2664,10 @@ $(document).ready(function(){
 	button_userscripts.on("click", function(ev) {
 		socket.send({'cmd': 'uslistrequest', 'data': ''});
 	});
+
+	button_samplers.on("click", function(ev) {
+		socket.send({'cmd': 'samplerlistrequest', 'data': ''});
+	});
 	
 	load_close.on("click", function(ev) {
 		hideLoadPopup();
@ -2581,6 +2701,16 @@ $(document).ready(function(){
 		socket.send({'cmd': 'usload', 'data': ''});
 		hideUSPopup();
 	});
+
+	samplers_close.on("click", function(ev) {
+		hideSamplersPopup();
+	});
+
+	samplers_accept.on("click", function(ev) {
+		hideMessage();
+		socket.send({'cmd': 'samplers', 'data': samplerslist.find(".samplerslistitem").map(function() { return parseInt($(this).attr("sid")); }).toArray()});
+		hideSamplersPopup();
+	});
 	
 	button_newgame.on("click", function(ev) {
 		if(connected) {
--- a/static/custom.css
+++ b/static/custom.css
@ -22,6 +22,14 @@ chunk.editing, chunk.editing * {
 	font-style: normal !important;
 }

+.setting-value-warning {
+	color: #ff7777;
+}
+
+.setting-value-warning:focus {
+	color: #ffaaaa !important;
+}
+
 .settinglabel input {
 	width: 5ch;
 	background-color: inherit;
@ -449,6 +457,26 @@ body.connected #popupfooter, #popupfooter.always-available {
 	overflow-wrap: anywhere;
 }

+#samplerspopup {
+	width: 300px;
+	background-color: #262626;
+	margin-top: 100px;
+}
+
+@media (max-width: 768px) {
+	#samplerspopup {
+		width: 100%;
+		background-color: #262626;
+		margin-top: 100px;
+	}
+}
+
+#samplerslist {
+	height: 300px;
+	overflow-y: scroll;
+	overflow-wrap: anywhere;
+}
+
 #nspopup {
 	width: 350px;
 	background-color: #262626;
@ -742,7 +770,7 @@ body.connected .dropdown-item:hover, .dropdown-item.always-available:hover {
 	background-color: #3bf723;
 }

-.ussortable-placeholder {
+.ussortable-placeholder, .samplerssortable-placeholder {
 	height: 4px;
 	background-color: #3bf723;
 }
@ -1332,7 +1360,7 @@ body.connected .popupfooter, .popupfooter.always-available {
 	background-color: #688f1f;
 }

-.uslistitem {
+.uslistitem, .samplerslistitem {
 	padding: 12px 10px 12px 10px;
 	display: flex;
 	flex-grow: 1;
@ -1344,11 +1372,11 @@ body.connected .popupfooter, .popupfooter.always-available {
 	transition: background-color 0.25s ease-in;
 }

-.uslistitemsub {
+.uslistitemsub, .samplerslistitemsub {
 	color: #ba9;
 }

-.uslistitem:hover {
+.uslistitem:hover, .samplerslistitem:hover {
 	cursor: move;
 	background-color: #688f1f;
 }
--- a/templates/index.html
+++ b/templates/index.html
@ -9,7 +9,7 @@
 	<link rel="stylesheet" href="static/bootstrap.min.css">
 	<link rel="stylesheet" href="static/bootstrap-toggle.min.css">
 	<link rel="stylesheet" href="static/open-iconic-bootstrap.min.css">
-	<link rel="stylesheet" href="static/custom.css?ver=1.17a">
+	<link rel="stylesheet" href="static/custom.css?ver=1.18.1a">

 	<script src="static/jquery-3.6.0.min.js"></script>
 	<script src="static/jquery-ui.sortable.min.js"></script>
@ -17,7 +17,7 @@
 	<script src="static/bootstrap.min.js"></script>
 	<script src="static/bootstrap-toggle.min.js"></script>
 	<script src="static/rangy-core.min.js"></script>
-	<script src="static/application.js?ver=1.17e"></script>
+	<script src="static/application.js?ver=1.18.1a"></script>
 </head>
 <body>
 	<input type="file" id="remote-save-select" accept="application/json" style="display:none">
@ -71,6 +71,9 @@
 								<li class="nav-item">
 									<a class="nav-link" href="#" id="btn_format">Formatting</a>
 								</li>
+								<li class="nav-item">
+									<a class="nav-link" href="#" id="btn_samplers">Samplers</a>
+								</li>
 								<li class="nav-item">
 									<a class="nav-link" href="#" id="btn_userscripts">Userscripts</a>
 								</li>
@ -299,6 +302,19 @@
 			</div>
 		</div>
 	</div>
+	<div class="popupcontainer hidden" id="samplerscontainer">
+		<div id="samplerspopup">
+			<div class="popuptitlebar">
+				<div class="popuptitletext">Drag-and-drop to change the order in which the samplers are applied</div>
+			</div>
+			<div id="samplerslist">
+			</div>
+			<div class="popupfooter">
+				<button type="button" class="btn btn-primary" id="btn_samplersaccept">Save</button>
+				<button type="button" class="btn btn-primary" id="btn_samplersclose">Cancel</button>
+			</div>
+		</div>
+	</div>
 	<div class="popupcontainer hidden" id="loadcontainerdelete">
 		<div id="loadpopupdelete">
 			<div class="popuptitlebar">
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@ -27,6 +27,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 '''

+import utils
+
 import multiprocessing
 from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
 import progressbar
@ -63,11 +65,13 @@ def stopping_callback(generated, n_generated, excluded_world_info) -> Tuple[List

 def settings_callback() -> dict:
    return {
+        "sampler_order": utils.default_sampler_order.copy(),
        "top_p": 0.9,
        "temp": 0.5,
        "top_k": 0,
        "tfs": 1.0,
        "typical": 1.0,
+        "top_a": 0.0,
        "repetition_penalty": 1.0,
        "rpslope": 0.0,
        "rprange": 0,
@ -156,10 +160,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
    logits[tokens] = penalty_logits
    return logits

-def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
+def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
    '''
-    This gets called by generate_loop_fn to apply a series of 5 filters
-    to the logits (top-k, then top-p, then TFS, then typical, then temperature)
+    This gets called by generate_loop_fn to apply a series of 6 filters
+    to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
    before picking one token using the modified logits
    '''
    # Top-k (keep only the k tokens with the highest logits and remove
@ -178,8 +182,18 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
            sorted_indices_to_remove,
        )
        return np.where(indices_to_remove, -np.inf, logits)
-    if top_k > 0:
-        logits = top_k_filter(logits)
+    # Top-a (remove all tokens that have softmax probability less than
+    # a*m^2 where m is the maximum softmax probability)
+    def top_a_filter(logits):
+        # Replace every element in the logits array
+        # with e (Euler's number) to the power of that element, and divide
+        # each element of the new array by the sum of the elements in the
+        # new array
+        probabilities = np.array(jax.nn.softmax(logits), copy=True)
+        # Find the largest probability
+        probs_max = probabilities.max()
+        # Remove tokens
+        return np.where(probabilities < probs_max * probs_max * top_a, -np.inf, logits)
    # Top-p (after sorting the remaining tokens again in descending order of
    # logit, remove the ones that have cumulative softmax probability
    # greater than p)
@ -205,8 +219,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
            sorted_indices_to_remove,
        )
        return np.where(indices_to_remove, -np.inf, logits)
-    if top_p < 1.0:
-        logits = top_p_filter(logits)
    # Tail free sampling (basically top-p a second time on remaining tokens
    # except it's the "cumulative normalized absolute second finite
    # differences of the softmax probabilities" instead of just the
@ -245,8 +257,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
            sorted_indices_to_remove,
        )
        return np.where(indices_to_remove, -np.inf, logits)
-    if tfs < 1.0:
-        logits = tail_free_filter(logits)
    # Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
    def typical_filter(logits):
        # Compute softmax probabilities and the natural logarithms of them
@ -276,10 +286,16 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
            sorted_indices_to_remove,
        )
        return np.where(indices_to_remove, -jnp.inf, logits)
-    if typical < 1.0:
-        logits = typical_filter(logits)
    # Temperature (just divide the logits by the temperature)
-    logits /= temp
+    def temp_filter(logits):
+        return logits / temp
+    for k in sampler_order:
+        if k == 0 and top_k > 0: logits = top_k_filter(logits)
+        if k == 1 and top_a > 0.0: logits = top_a_filter(logits)
+        if k == 2 and top_p < 1.0: logits = top_p_filter(logits)
+        if k == 3 and tfs < 1.0: logits = tail_free_filter(logits)
+        if k == 4 and typical < 1.0: logits = typical_filter(logits)
+        if k == 5 and temp != 1.0: logits = temp_filter(logits)
    # Finally, pick one token using the softmax thingy again (it gives
    # an array whose elements sum to 1 so it can be used nicely as a
    # probability distribution)
@ -330,10 +346,10 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
    # positions in the logits array
    return logits.at[tokens].set(penalty_logits)

-def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
+def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
    '''
-    This gets called by generate_loop_fn to apply a series of 5 filters
-    to the logits (top-k, then top-p, then TFS, then typical, then temperature)
+    This gets called by generate_loop_fn to apply a series of 6 filters
+    to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
    before picking one token using the modified logits
    '''
    # Top-k (keep only the k tokens with the highest logits and remove
@ -352,7 +368,18 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
            sorted_indices_to_remove,
        )
        return jnp.where(indices_to_remove, -jnp.inf, logits)
-    logits = jax.lax.cond(top_k > 0, top_k_filter, lambda x: x, logits)
+    # Top-a (remove all tokens that have softmax probability less than
+    # a*m^2 where m is the maximum softmax probability)
+    def top_a_filter(logits):
+        # Replace every element in the logits array
+        # with e (Euler's number) to the power of that element, and divide
+        # each element of the new array by the sum of the elements in the
+        # new array
+        probabilities = jax.nn.softmax(logits)
+        # Find the largest probability
+        probs_max = probabilities.max()
+        # Remove tokens
+        return jnp.where(probabilities < probs_max * probs_max * top_a, -jnp.inf, logits)
    # Top-p (after sorting the remaining tokens again in descending order of
    # logit, remove the ones that have cumulative softmax probability
    # greater than p)
@ -378,7 +405,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
            sorted_indices_to_remove,
        )
        return jnp.where(indices_to_remove, -jnp.inf, logits)
-    logits = jax.lax.cond(top_p < 1.0, top_p_filter, lambda x: x, logits)
    # Tail free sampling (basically top-p a second time on remaining tokens
    # except it's the "cumulative normalized absolute second finite
    # differences of the softmax probabilities" instead of just the
@ -417,7 +443,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
            sorted_indices_to_remove,
        )
        return jnp.where(indices_to_remove, -jnp.inf, logits)
-    logits = jax.lax.cond(tfs < 1.0, tail_free_filter, lambda x: x, logits)
    # Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
    def typical_filter(logits):
        # Compute softmax probabilities and the natural logarithms of them
@ -446,11 +471,16 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
            sorted_indices_to_remove,
        )
        return jnp.where(indices_to_remove, -jnp.inf, logits)
-    logits = jax.lax.cond(typical < 1.0, typical_filter, lambda x: x, logits)
    # Temperature (just divide the logits by the temperature)
    def temp_filter(logits):
        return logits / temp
-    logits = jax.lax.cond(True, temp_filter, lambda x: x, logits)
+    for k in sampler_order:
+        logits = jax.lax.cond(jnp.logical_and(k == 0, top_k > 0), top_k_filter, lambda x: x, logits)
+        logits = jax.lax.cond(jnp.logical_and(k == 1, top_a > 0.0), top_a_filter, lambda x: x, logits)
+        logits = jax.lax.cond(jnp.logical_and(k == 2, top_p < 1.0), top_p_filter, lambda x: x, logits)
+        logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits)
+        logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits)
+        logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits)
    # Finally, pick one token using the softmax thingy again (it gives
    # an array whose elements sum to 1 so it can be used nicely as a
    # probability distribution)
@ -804,6 +834,7 @@ def infer_static(
    top_k=0,
    tfs=1.0,
    typical=1.0,
+    top_a=0.0,
    repetition_penalty=1.0,
    rpslope=0.0,
    rprange=0,
@ -811,8 +842,12 @@ def infer_static(
    gen_len=80,
    soft_embeddings: Optional[np.array] = None,
    soft_tokens: Optional[np.array] = None,
+    sampler_order: Optional[List[int]] = None,
 ) -> List[np.array]:
    maps.thread_resources.env = thread_resources_env
+    if sampler_order is None:
+        sampler_order = utils.default_sampler_order.copy()
+    sampler_order = np.uint32(sampler_order)
    total_batch = 1
    tokens = context
    if(soft_tokens is not None):
@ -823,10 +858,12 @@ def infer_static(
    batched_tokens = np.array([padded_tokens] * total_batch)
    samples = []
    batched_generator_params = {
+        "sampler_order": np.repeat(sampler_order[np.newaxis], total_batch, axis=0),
        "temp": temp * np.ones(total_batch),
        "top_p": top_p * np.ones(total_batch),
        "tfs": tfs * np.ones(total_batch),
        "typical": typical * np.ones(total_batch),
+        "top_a": top_a * np.ones(total_batch),
        "repetition_penalty": repetition_penalty * np.ones(total_batch),
        "rpslope": rpslope * np.ones(total_batch),
        "rprange": np.full(total_batch, rprange, dtype=np.uint32),
@ -983,6 +1020,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
    global thread_resources_env, seq, tokenizer, network, params

+    if not hasattr(vars, "sampler_order") or not vars.sampler_order:
+        vars.sampler_order = utils.default_sampler_order.copy()
+
    default_params = {
        "compat": "j",
        "layers": 28,
@ -1054,7 +1094,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
        # by the number of TPU cores, and fall back to one core if an even
        # number of TPU cores is not possible.
        for c in (8, 6, 4, 2, 1):
-            if 0 == params["n_heads"] % c == params["d_model"] % c:
+            if 0 == params["n_heads"] % c == params.get("d_embed", params["d_model"]) % c:
                params["cores_per_replica"] = c
                break

@ -1079,6 +1119,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                return old_encode(s).ids
            return encode
        tokenizer.encode = new_encode(tokenizer.encode)
+        tokenizer._koboldai_header = []
    elif not hf_checkpoint:
        if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
            raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")
@ -1092,13 +1133,18 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
    print("Connecting to your Colab instance's TPU", flush=True)
    spinner = multiprocessing.Process(target=show_spinner, args=())
    spinner.start()
-    colab_tpu_addr = os.environ['COLAB_TPU_ADDR'].split(':')[0]
-    url = f'http://{colab_tpu_addr}:8475/requestversion/{driver_version}'
+    if os.environ.get('COLAB_TPU_ADDR', '') != '':
+        tpu_address = os.environ['COLAB_TPU_ADDR']  # Colab
+    else:
+        tpu_address = os.environ['TPU_NAME']  # Kaggle
+    tpu_address = tpu_address.replace("grpc://", "")
+    tpu_address_without_port = tpu_address.split(':', 1)[0]
+    url = f'http://{tpu_address_without_port}:8475/requestversion/{driver_version}'
+    config.FLAGS.jax_xla_backend = "tpu_driver"
+    config.FLAGS.jax_backend_target = "grpc://" + tpu_address
    requests.post(url)
    spinner.terminate()
    print()
-    config.FLAGS.jax_xla_backend = "tpu_driver"
-    config.FLAGS.jax_backend_target = "grpc://" + os.environ['COLAB_TPU_ADDR']

    cores_per_replica = params["cores_per_replica"]
    seq = params["seq"]
@ -1158,13 +1204,27 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
    import functools

    def callback(model_dict, f, **_):
+        if callback.nested:
+            return
+        callback.nested = True
        with zipfile.ZipFile(f, "r") as z:
            try:
                last_storage_key = None
                f = None
                current_offset = 0
-                print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")
-                for key in tqdm(sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
+                if utils.current_shard == 0:
+                    print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")
+
+                if utils.num_shards is None or utils.current_shard == 0:
+                    if utils.num_shards is not None:
+                        num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                    else:
+                        num_tensors = len(model_dict)
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+
+                if utils.num_shards is not None:
+                    utils.current_shard += 1
+                for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):

                    # Some model weights are used by transformers but not by MTJ.
                    # We have to materialize these weights anyways because
@ -1173,6 +1233,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                    # tensors, which don't take up any actual CPU or TPU memory.
                    if key not in model_spec:
                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
+                        utils.bar.update(1)
                        continue

                    storage_key = model_dict[key].key
@ -1200,6 +1261,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo

                    # MTJ requires certain mathematical operations to be performed
                    # on tensors in order for them to be in the correct format
+                    if "remove_first_two_rows" in transforms:
+                        tensor = tensor[2:]
                    if "divide_by_shards" in transforms:
                        tensor /= params["cores_per_replica"]
                    if "vocab_pad" in transforms:
@ -1223,6 +1286,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                        np.empty(params["cores_per_replica"]),
                    )

+                    utils.bar.update(1)
+
+                if utils.num_shards is not None and utils.current_shard < utils.num_shards:
+                    return
+
                # Check for tensors that MTJ needs that were not provided in the
                # HF model
                for mk, mv in network.state["params"].items():
@ -1241,8 +1309,13 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                                print("\n\nERROR:  " + error, file=sys.stderr)
                                raise RuntimeError(error)
            finally:
+                if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                    utils.bar.close()
+                    utils.bar = None
+                callback.nested = False
                if isinstance(f, zipfile.ZipExtFile):
                    f.close()
+    callback.nested = False

    if os.path.isdir(vars.model.replace('/', '_')):
        import shutil
@ -1252,6 +1325,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
        if(os.path.isdir(vars.custmodpth)):
            try:
                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                pass
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
            except Exception as e:
                try:
                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1264,6 +1341,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
        elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
            try:
                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                pass
+            try:
+                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
            except Exception as e:
                try:
                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1276,6 +1357,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
        else:
            try:
                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                pass
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
            except Exception as e:
                try:
                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
--- a/update-koboldai.bat
+++ b/update-koboldai.bat
@ -50,4 +50,4 @@ git remote add origin %origin%
 git fetch --all
 git checkout %branch% -f
 git reset --hard origin/%branch%
-cmd /k
+%windir%\system32\timeout -t 10
--- a/utils.py
+++ b/utils.py
@ -5,9 +5,22 @@ import json
 import subprocess
 import tempfile
 import requests
+import requests.adapters
+import time
+from tqdm.auto import tqdm
 import os
+import itertools
+from typing import Optional

 vars = None
+num_shards: Optional[int] = None
+current_shard = 0
+from_pretrained_model_name = ""
+from_pretrained_index_filename: Optional[str] = None
+from_pretrained_kwargs = {}
+bar = None
+
+default_sampler_order = [0, 1, 2, 3, 4, 5]

 #==================================================================#
 # Decorator to prevent a function's actions from being run until
@ -130,10 +143,18 @@ def encodenewlines(txt):
 def decodenewlines(txt):
    if(vars.newlinemode == "s"):
        return txt.replace("</s>", '\n')
+    if(vars.newlinemode == "ns"):
+        return txt.replace("</s>", '')
    return txt

 #==================================================================#
-#  Downloads sharded huggingface checkpoints using aria2c if possible
+#  Returns number of layers given an HF model config
+#==================================================================#
+def num_layers(config):
+    return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers
+
+#==================================================================#
+#  Downloads huggingface checkpoints using aria2c if possible
 #==================================================================#
 def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
    import transformers
@ -191,6 +212,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
        if not urls:
            return
    etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
+    headers = [requests.head(u, headers=headers, allow_redirects=True, proxies=proxies, timeout=10).headers for u in urls]
    filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
    for n in filenames:
        path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
@ -206,22 +228,75 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
            path = os.path.join(_cache_dir, n)
            if os.path.exists(path):
                os.remove(path)
+    total_length = sum(int(h["Content-Length"]) for h in headers)
+    lengths = {}
    aria2_config = "\n".join(f"{u}\n  out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
-    with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
-        f.write(aria2_config)
-        f.flush()
-        p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming", "false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-        for line in p.stdout:
-            print(line.decode(), end="", flush=True)
-        path = f.name
+    s = requests.Session()
+    s.mount("http://", requests.adapters.HTTPAdapter(max_retries=requests.adapters.Retry(total=120, backoff_factor=1)))
+    bar = None
+    done = False
+    secret = os.urandom(17).hex()
    try:
-        os.remove(path)
-    except OSError:
-        pass
+        with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
+            f.write(aria2_config)
+            f.flush()
+            p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            while p.poll() is None:
+                r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
+                if not r:
+                    s.close()
+                    if bar is not None:
+                        bar.n = bar.total
+                        bar.close()
+                    p.terminate()
+                    done = True
+                    break
+                if bar is None:
+                    bar = tqdm(total=total_length, desc=f"[aria2] Downloading model", unit="B", unit_scale=True, unit_divisor=1000)
+                visited = set()
+                for x in r:
+                    filename = x["files"][0]["path"]
+                    lengths[filename] = (int(x["completedLength"]), int(x["totalLength"]))
+                    visited.add(filename)
+                for k, v in lengths.items():
+                    if k not in visited:
+                        lengths[k] = (v[1], v[1])
+                bar.n = sum(v[0] for v in lengths.values())
+                bar.update()
+                time.sleep(0.1)
+            path = f.name
+    except Exception as e:
+        p.terminate()
+        raise e
+    finally:
+        try:
+            os.remove(path)
+        except OSError:
+            pass
    code = p.wait()
-    if code:
+    if not done and code:
        raise OSError(f"aria2 exited with exit code {code}")
    for u, t, n in zip(urls, etags, filenames):
        os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
        with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
            json.dump({"url": u, "etag": t}, f)
+
+#==================================================================#
+#  Given the path to a pytorch_model.bin.index.json, returns how many
+#  shards there are in the model
+#==================================================================#
+def get_num_shards(filename):
+    with open(filename) as f:
+        map_data = json.load(f)
+    return len(set(map_data["weight_map"].values()))
+
+#==================================================================#
+#  Given the name/path of a sharded model and the path to a
+#  pytorch_model.bin.index.json, returns a list of weight names in the
+#  sharded model.  Requires lazy loader to be enabled to work properl
+#==================================================================#
+def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
+    import transformers.modeling_utils
+    import torch
+    shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision, mirror=mirror)
+    return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
--- a/warpers.py
+++ b/warpers.py
@ -148,3 +148,32 @@ class TypicalLogitsWarper(LogitsWarper):
        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
        scores = scores.masked_fill(indices_to_remove, self.filter_value)
        return scores
+
+
+class TopALogitsWarper(LogitsWarper):
+    def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+        top_a = float(top_a)
+        if top_a < 0 or top_a > 1.0:
+            raise ValueError(f"`top_a` has to be a float >= 0 and <= 1, but is {top_a}")
+        self.top_a = top_a
+        self.filter_value = filter_value
+        self.min_tokens_to_keep = min_tokens_to_keep
+
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        if self.filter_value >= 1.0:
+            return scores
+
+        sorted_logits, sorted_indices = torch.sort(scores, descending=True)
+        probs = sorted_logits.softmax(dim=-1)
+
+        # Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)
+        probs_max = probs[..., 0, None]
+        sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a
+
+        if self.min_tokens_to_keep > 1:
+            # Keep at least min_tokens_to_keep
+            sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
+
+        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+        scores = scores.masked_fill(indices_to_remove, self.filter_value)
+        return scores