diff --git a/.gitignore b/.gitignore
index d470fb4b..5b024bd8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ stories
 /.project
 *.bak
 miniconda3
+runtime
+bin
 *.settings
 __pycache__
 *.log
@@ -21,9 +23,14 @@ userscripts
 softprompts
 models
 !models/models go here.txt
+Uninstall
+.ipynb_checkpoints
 
 # Ignore PyCharm project files.
 .idea
 
 # Ignore compiled Python files.
 *.pyc
+
+# Don't ignore defaults
+!defaults/*
\ No newline at end of file
diff --git a/notebook.bat b/Jupyter.bat
similarity index 80%
rename from notebook.bat
rename to Jupyter.bat
index e68e7848..df19a07f 100644
--- a/notebook.bat
+++ b/Jupyter.bat
@@ -7,23 +7,23 @@ IF %M%==2 GOTO subfolder
 IF %M%==3 GOTO drivemap_B
 
 :subfolder
-umamba.exe install --no-shortcuts -r miniconda3 -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r miniconda3 -n base -c conda-forge jupyterlab jupyterlab-git
 call miniconda3\condabin\activate
-jupyter notebook
+jupyter-lab
 cmd /k
 
 :drivemap
 subst K: miniconda3 >nul
-umamba.exe install --no-shortcuts -r K:\python\ -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r K:\python\ -n base -c conda-forge jupyterlab jupyterlab-git
 call K:\python\condabin\activate
-jupyter notebook
+jupyter-lab
 subst K: /D
 cmd /k
 
 :drivemap_B
 subst B: miniconda3 >nul
-umamba.exe install --no-shortcuts -r B:\python\ -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r B:\python\ -n base -c conda-forge jupyterlab jupyterlab-git
 call B:\python\condabin\activate
-jupyter notebook
+jupyter-lab
 subst B: /D
 cmd /k
\ No newline at end of file
diff --git a/Uninstall.bat b/Uninstall.bat
new file mode 100644
index 00000000..ed143996
--- /dev/null
+++ b/Uninstall.bat
@@ -0,0 +1,32 @@
+@echo off 
+cd /D %~dp0
+TITLE KoboldAI Uninstall Helper
+SET /P M=<loader.settings
+IF %M%==3 subst /D B: >nul
+IF %M%==1 subst /D K: >nul
+
+IF "%1" == "FORCE" GOTO UNINSTALL
+
+IF EXIST "Uninstall\unins000.exe" (
+   start Uninstall\unins000.exe
+   exit
+) ELSE (
+   echo This will remove all KoboldAI folders that do not contain user data
+	pause
+	GOTO UNINSTALL
+)
+
+:UNINSTALL
+echo Uninstallation in progress, please wait...
+set DM=Y
+attrib -h .git >nul
+for /d %%D in (*) do if not "%%~nxD"=="stories" if not "%%~nxD"=="userscripts" if not "%%~nxD"=="settings" if not "%%~nxD"=="softprompts" if not "%%~nxD"=="models" if not "%%~nxD"=="Uninstall" rmdir /S /Q %%~nxD
+for %%i in (*) do if not "%%i"=="Uninstall.bat" del /q "%%i"
+set /P DM=Would you like to delete the models folder? (Y/n) :
+IF %DM%==Y rmdir models /s /q
+IF %DM%==y rmdir models /s /q
+set DM=N
+set /P DM=Would you like to delete all other user folders? (y/N) :
+IF %DM%==Y rmdir stories userscripts settings softprompts /s /q
+IF %DM%==y rmdir stories userscripts settings softprompts /s /q
+del Uninstall.bat
\ No newline at end of file
diff --git a/aiserver.py b/aiserver.py
index 92c1442b..4e33332b 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python3
 #==================================================================#
 # KoboldAI
-# Version: 1.17.0
+# Version: 1.18.0
 # By: KoboldAIDev and the KoboldAI Community
 #==================================================================#
 
@@ -10,10 +10,15 @@ import eventlet
 eventlet.monkey_patch(all=True, thread=False)
 import os
 os.system("")
+__file__ = os.path.dirname(os.path.realpath(__file__))
+os.chdir(__file__)
 os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 from eventlet import tpool
 
+import logging
+logging.getLogger("urllib3").setLevel(logging.ERROR)
+
 from os import path, getcwd
 import time
 import re
@@ -21,11 +26,15 @@ import json
 import collections
 import zipfile
 import packaging
+import packaging.version
 import contextlib
 import traceback
 import threading
 import markdown
 import bleach
+import itertools
+import bisect
+import functools
 from collections.abc import Iterable
 from typing import Any, Callable, TypeVar, Tuple, Union, Dict, Set, List
 
@@ -49,6 +58,16 @@ if lupa.LUA_VERSION[:2] != (5, 4):
     print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)
 
 
+# Make sure tqdm progress bars display properly in Colab
+from tqdm.auto import tqdm
+old_init = tqdm.__init__
+def new_init(self, *args, **kwargs):
+    old_init(self, *args, **kwargs)
+    if(self.ncols == 0 and kwargs.get("ncols") != 0):
+        self.ncols = 99
+tqdm.__init__ = new_init
+
+
 #==================================================================#
 # Variables & Storage
 #==================================================================#
@@ -65,35 +84,115 @@ class colors:
     UNDERLINE = '\033[4m'
 
 # AI models
-modellist = [
+mainmenu = [
     ["Load a model from its directory", "NeoCustom", ""],
     ["Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom", ""],
-    ["Skein 6B (Hybrid)", "KoboldAI/GPT-J-6B-Skein", "16GB"],
-    ["Janeway 6B (Novel)", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
+    ["Adventure Models", "adventurelist", ""],
+    ["Novel Models", "novellist", ""],
+    ["NSFW Models", "nsfwlist", ""],
+    ["Chatbot Models", "chatlist", ""],
+    ["Untuned GPT-Neo/J", "gptneolist", ""],
+    ["Untuned Fairseq Dense", "fsdlist", ""],
+    ["Untuned OPT", "optlist", ""],
+    ["Untuned XGLM", "xglmlist", ""],
+    ["Untuned GPT2", "gpt2list", ""],
+    ["Online Services", "apilist", ""],
+    ["Read Only (No AI)", "ReadOnly", ""]
+    ]
+
+adventurelist= [
+    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
+    ["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
     ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
+    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
+    ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
+    ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
+    ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
+    ["Return to Main Menu", "Return", ""],
+]
+
+novellist= [
+    ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
+    ["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
+    ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
+    ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
+    ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
+    ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
+    ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
+    ["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
+    ["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
+    ["Return to Main Menu", "Return", ""],
+]
+
+nsfwlist= [
+    ["Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB"],
+    ["Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB"],
     ["Lit 6B (NSFW)", "hakurei/lit-6B", "16GB"],
     ["Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB"],
+    ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"],
+    ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"],
+    ["Return to Main Menu", "Return", ""],
+]
+
+chatlist= [
     ["Convo 6B (Chatbot)", "hitomi-team/convo-6B", "16GB"],
     ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB"],
-    ["Janeway 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
-    ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
-    ["Picard 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
-    ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"],
-    ["Horni-LN 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
-    ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"],
+    ["C1 1.3B (Chatbot)", "iokru/c1-1.3B", "6GB"],
+    ["Return to Main Menu", "Return", ""],
+]
+gptneolist = [
     ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB"],
     ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB"],
     ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB"],
+    ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB"],
+    ["Return to Main Menu", "Return", ""],
+]
+
+gpt2list = [
     ["GPT-2 XL", "gpt2-xl", "6GB"],
     ["GPT-2 Large", "gpt2-large", "4GB"],
     ["GPT-2 Med", "gpt2-medium", "2GB"],
     ["GPT-2", "gpt2", "2GB"],
+    ["Return to Main Menu", "Return", ""],
+    ]
+
+optlist = [
+    ["OPT 30B", "facebook/opt-30b", "64GB"],
+    ["OPT 13B", "facebook/opt-13b", "32GB"],
+    ["OPT 6.7B", "facebook/opt-6.7b", "16GB"],
+    ["OPT 2.7B", "facebook/opt-2.7b", "8GB"],
+    ["OPT 1.3B", "facebook/opt-1.3b", "4GB"],
+    ["OPT 350M", "facebook/opt-350m", "2GB"],
+    ["OPT 125M", "facebook/opt-125m", "1GB"],
+    ["Return to Main Menu", "Return", ""],
+    ]
+
+fsdlist = [
+    ["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
+    ["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
+    ["Fairseq Dense 2.7B", "KoboldAI/fairseq-dense-2.7B", "8GB"],
+    ["Fairseq Dense 1.3B", "KoboldAI/fairseq-dense-1.3B", "4GB"],
+    ["Fairseq Dense 355M", "KoboldAI/fairseq-dense-355M", "2GB"],
+    ["Fairseq Dense 125M", "KoboldAI/fairseq-dense-125M", "1GB"],
+    ["Return to Main Menu", "Return", ""],
+    ]
+
+xglmlist = [
+    ["XGLM 4.5B (Larger Dataset)", "facebook/xglm-4.5B", "12GB"],
+    ["XGLM 7.5B", "facebook/xglm-7.5B", "18GB"],
+    ["XGLM 2.9B", "facebook/xglm-2.9B", "10GB"],
+    ["XGLM 1.7B", "facebook/xglm-1.7B", "6GB"],
+    ["XGLM 564M", "facebook/xglm-564M", "4GB"],
+    ["Return to Main Menu", "Return", ""],
+    ]
+
+apilist = [
+    ["GooseAI API (requires API key)", "GooseAI", ""],
     ["OpenAI API (requires API key)", "OAI", ""],
     ["InferKit API (requires API key)", "InferKit", ""],
     ["KoboldAI Server API (Old Google Colab)", "Colab", ""],
-    ["Read Only (No AI)", "ReadOnly", ""]
-    ]
-
+    ["Return to Main Menu", "Return", ""],
+]
 # Variables
 class vars:
     lastact     = ""     # The last action received from the user
@@ -103,17 +202,18 @@ class vars:
     model_type  = ""     # Model Type (Automatically taken from the model config)
     noai        = False  # Runs the script without starting up the transformers pipeline
     aibusy      = False  # Stops submissions while the AI is working
-    max_length  = 1024    # Maximum number of tokens to submit per action
+    max_length  = 2048    # Maximum number of tokens to submit per action
     ikmax       = 3000   # Maximum number of characters to submit to InferKit
     genamt      = 80     # Amount of text for each action to generate
     ikgen       = 200    # Number of characters for InferKit to generate
     rep_pen     = 1.1    # Default generator repetition_penalty
-    rep_pen_slope = 1.0  # Default generator repetition penalty slope
-    rep_pen_range = 512    # Default generator repetition penalty range
+    rep_pen_slope = 0.7  # Default generator repetition penalty slope
+    rep_pen_range = 1024 # Default generator repetition penalty range
     temp        = 0.5    # Default generator temperature
     top_p       = 0.9    # Default generator top_p
     top_k       = 0      # Default generator top_k
     tfs         = 1.0    # Default generator tfs (tail-free sampling)
+    typical     = 1.0    # Default generator typical sampling threshold
     numseqs     = 1     # Number of sequences to ask the generator to create
     gamestarted = False  # Whether the game has started (disables UI elements)
     gamesaved   = True   # Whether or not current game is saved
@@ -125,12 +225,19 @@ class vars:
     setauthornotetemplate = authornotetemplate  # Saved author's note template in settings
     andepth     = 3      # How far back in history to append author's note
     actions     = structures.KoboldStoryRegister()  # Actions submitted by user and AI
+    actions_metadata = {} # List of dictonaries, one dictonary for every action that contains information about the action like alternative options.
+                          # Contains at least the same number of items as actions. Back action will remove an item from actions, but not actions_metadata
+                          # Dictonary keys are:
+                          # Selected Text: (text the user had selected. None when this is a newly generated action)
+                          # Alternative Generated Text: {Text, Pinned, Previous Selection, Edited}
+                          # 
     worldinfo   = []     # List of World Info key/value objects
     worldinfo_i = []     # List of World Info key/value objects sans uninitialized entries
     worldinfo_u = {}     # Dictionary of World Info UID - key/value pairs
     wifolders_d = {}     # Dictionary of World Info folder UID-info pairs
     wifolders_l = []     # List of World Info folder UIDs
     wifolders_u = {}     # Dictionary of pairs of folder UID - list of WI UID
+    modelconfig = {}     # Raw contents of the model's config.json, or empty dictionary if none found
     lua_state   = None   # Lua state of the Lua scripting system
     lua_koboldbridge = None  # `koboldbridge` from bridge.lua
     lua_kobold  = None   # `kobold` from` bridge.lua
@@ -143,12 +250,16 @@ class vars:
     abort       = False  # Whether or not generation was aborted by clicking on the submit button during generation
     compiling   = False  # If using a TPU Colab, this will be set to True when the TPU backend starts compiling and then set to False again
     checking    = False  # Whether or not we are actively checking to see if TPU backend is compiling or not
+    sp_changed  = False  # This gets set to True whenever a userscript changes the soft prompt so that check_for_sp_change() can alert the browser that the soft prompt has changed
     spfilename  = ""     # Filename of soft prompt to load, or an empty string if not using a soft prompt
     userscripts = []     # List of userscripts to load
     last_userscripts = []  # List of previous userscript filenames from the previous time userscripts were send via usstatitems
     corescript  = "default.lua"  # Filename of corescript to load
     # badwords    = []     # Array of str/chr values that should be removed from output
     badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
+    badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
+    badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]]
+    fp32_model  = False  # Whether or not the most recently loaded HF model was in fp32 format
     deletewi    = None   # Temporary storage for UID to delete
     wirmvwhtsp  = False  # Whether to remove leading whitespace from WI entries
     widepth     = 3      # How many historical actions to scan for WI hits
@@ -183,7 +294,7 @@ class vars:
     recentrngm  = None   # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None
     useprompt   = False   # Whether to send the full prompt with every submit action
     breakmodel  = False  # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only
-    bmsupported = False  # Whether the breakmodel option is supported (GPT-Neo/GPT-J only, currently)
+    bmsupported = False  # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM/OPT only, currently)
     nobreakmodel = False  # Something specifically requested Breakmodel to be disabled (For example a models config)
     smandelete  = False  # Whether stories can be deleted from inside the browser
     smanrename  = False  # Whether stories can be renamed from inside the browser
@@ -200,21 +311,27 @@ class vars:
     adventure   = False
     actionmode  = 1
     dynamicscan = False
-    remote      = False
+    host        = False
     nopromptgen = False
     rngpersist  = False
     nogenmod    = False
     welcome     = False  # Custom Welcome Text (False is default)
     newlinemode = "n"
+    quiet       = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
+    debug       = False # If set to true, will send debug information to the client for display
+    lazy_load   = True  # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
+    use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" or os.environ.get("TPU_NAME", "") != ""  # Whether or not we're in a Colab TPU instance or Kaggle TPU instance and are going to use the TPU rather than the CPU
+
+utils.vars = vars
 
 #==================================================================#
 # Function to get model selection at startup
 #==================================================================#
-def getModelSelection():
-    print("    #   Model                           VRAM\n    =========================================")
+def getModelSelection(modellist):
+    print("    #    Model\t\t\t\t\t\tVRAM\n    ========================================================")
     i = 1
     for m in modellist:
-        print("    {0} - {1}\t\t{2}".format("{:<2}".format(i), m[0].ljust(15), m[2]))
+        print("    {0} - {1}\t\t\t{2}".format("{:<2}".format(i), m[0].ljust(25), m[2]))
         i += 1
     print(" ");
     modelsel = 0
@@ -226,30 +343,36 @@ def getModelSelection():
         else:
             print("{0}Please enter a valid selection.{1}".format(colors.RED, colors.END))
     
-    # If custom model was selected, get the filesystem location and store it
-    if(vars.model == "NeoCustom" or vars.model == "GPT2Custom"):
-        print("{0}Please choose the folder where pytorch_model.bin is located:{1}\n".format(colors.CYAN, colors.END))
+    # Model Lists
+    try:
+        getModelSelection(eval(vars.model))
+    except Exception as e:
+        if(vars.model == "Return"):
+            getModelSelection(mainmenu)
+
+        # If custom model was selected, get the filesystem location and store it
+        if(vars.model == "NeoCustom" or vars.model == "GPT2Custom"):
+            print("{0}Please choose the folder where pytorch_model.bin is located:{1}\n".format(colors.CYAN, colors.END))
+            modpath = fileops.getdirpath(getcwd() + "/models", "Select Model Folder")
         
-        modpath = fileops.getdirpath(getcwd(), "Select Model Folder")
-        
-        if(modpath):
-            # Save directory to vars
-            vars.custmodpth = modpath
-        else:
-            # Print error and retry model selection
-            print("{0}Model select cancelled!{1}".format(colors.RED, colors.END))
-            print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
-            getModelSelection()
+            if(modpath):
+                # Save directory to vars
+                vars.custmodpth = modpath
+            else:
+                # Print error and retry model selection
+                print("{0}Model select cancelled!{1}".format(colors.RED, colors.END))
+                print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
+                getModelSelection(mainmenu)
 
 #==================================================================#
 # Return all keys in tokenizer dictionary containing char
 #==================================================================#
-def gettokenids(char):
-    keys = []
-    for key in vocab_keys:
-        if(key.find(char) != -1):
-            keys.append(key)
-    return keys
+#def gettokenids(char):
+#    keys = []
+#    for key in vocab_keys:
+#        if(key.find(char) != -1):
+#            keys.append(key)
+#    return keys
 
 #==================================================================#
 # Return Model Name
@@ -258,7 +381,7 @@ def getmodelname():
     if(args.configname):
        modelname = args.configname
        return modelname
-    if(vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ")):
+    if(vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
         modelname = os.path.basename(os.path.normpath(vars.custmodpth))
         return modelname
     else:
@@ -285,10 +408,10 @@ def device_list(n_layers, primary=None, selected=None):
     sep_color = colors.YELLOW
     print(f"{row_color}   {' '*9} N/A  {sep_color}|{row_color}     {n_layers:3}  {sep_color}|{row_color}  (CPU){colors.END}")
 
-def device_config(model):
+def device_config(config):
     global breakmodel, generator
     import breakmodel
-    n_layers = model.config.num_layers if hasattr(model.config, "num_layers") else model.config.n_layer
+    n_layers = utils.num_layers(config)
     if(args.breakmodel_gpulayers is not None):
         try:
             breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
@@ -361,49 +484,85 @@ def device_config(model):
     # If all layers are on the same device, use the old GPU generation mode
     while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
         breakmodel.gpu_blocks.pop()
-    if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, model.config.num_layers if hasattr(model.config, "num_layers") else model.config.n_layer)):
+    if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, utils.num_layers(config))):
         vars.breakmodel = False
         vars.usegpu = True
         vars.gpu_device = len(breakmodel.gpu_blocks)-1
-        model = model.half().to(vars.gpu_device)
-        generator = model.generate
         return
 
     if(not breakmodel.gpu_blocks):
         print("Nothing assigned to a GPU, reverting to CPU only mode")
         vars.breakmodel = False
         vars.usegpu = False
-        model = model.to('cpu').float()
+        return
+
+def move_model_to_devices(model):
+    global generator
+
+    if(not vars.breakmodel):
+        if(vars.usegpu):
+            model = model.half().to(vars.gpu_device)
+        else:
+            model = model.to('cpu').float()
         generator = model.generate
         return
-    model.half().to('cpu')
+
+    model.half()
     gc.collect()
-    model.transformer.wte.to(breakmodel.primary_device)
-    model.transformer.ln_f.to(breakmodel.primary_device)
-    if(hasattr(model, 'lm_head')):
+    if(hasattr(model, "transformer")):
+        model.transformer.wte.to(breakmodel.primary_device)
+        model.transformer.ln_f.to(breakmodel.primary_device)
+        if(hasattr(model, 'lm_head')):
+            model.lm_head.to(breakmodel.primary_device)
+        if(hasattr(model.transformer, 'wpe')):
+            model.transformer.wpe.to(breakmodel.primary_device)
+    elif(not hasattr(model.model, "decoder")):
+        model.model.embed_tokens.to(breakmodel.primary_device)
+        model.model.layer_norm.to(breakmodel.primary_device)
         model.lm_head.to(breakmodel.primary_device)
-    if(hasattr(model.transformer, 'wpe')):
-        model.transformer.wpe.to(breakmodel.primary_device)
+        model.model.embed_positions.to(breakmodel.primary_device)
+    else:
+        model.model.decoder.embed_tokens.to(breakmodel.primary_device)
+        if(model.model.decoder.project_in is not None):
+            model.model.decoder.project_in.to(breakmodel.primary_device)
+        if(model.model.decoder.project_out is not None):
+            model.model.decoder.project_out.to(breakmodel.primary_device)
+        model.model.decoder.embed_positions.to(breakmodel.primary_device)
     gc.collect()
-    GPTNeoModel.forward = breakmodel.new_forward
+    GPTNeoModel.forward = breakmodel.new_forward_neo
     if("GPTJModel" in globals()):
-        GPTJModel.forward = breakmodel.new_forward
+        GPTJModel.forward = breakmodel.new_forward_neo # type: ignore
+    if("XGLMModel" in globals()):
+        XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore
+    if("OPTDecoder" in globals()):
+        OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore
     generator = model.generate
-    breakmodel.move_hidden_layers(model.transformer)
+    if(hasattr(model, "transformer")):
+        breakmodel.move_hidden_layers(model.transformer)
+    elif(not hasattr(model.model, "decoder")):
+        breakmodel.move_hidden_layers(model.model, model.model.layers)
+    else:
+        breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers)
 
 #==================================================================#
 #  Allow the models to override some settings
 #==================================================================#
 def loadmodelsettings():
     try:
-        model_js_config = str(model_config).partition(' ')[2]
-        js   = json.loads(model_js_config)
+        js   = json.loads(str(model_config).partition(' ')[2])
     except Exception as e:
         try:
-            model_js_config = open(vars.custmodpth + "/config.json", "r")
+            try:
+                js   = json.load(open(vars.custmodpth + "/config.json", "r"))
+            except Exception as e:
+                js   = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))            
         except Exception as e:
-            model_js_config = open(vars.custmodpth.replace('/', '_') + "/config.json", "r")
-        js   = json.load(model_js_config)
+            js   = {}
+    if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
+        vars.newlinemode = "s"  # Default to </s> newline mode if using XGLM
+    if vars.model_type == "opt":
+        vars.newlinemode = "ns"  # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
+    vars.modelconfig = js
     if("badwordsids" in js):
         vars.badwordsids = js["badwordsids"]
     if("nobreakmodel" in js):
@@ -416,6 +575,8 @@ def loadmodelsettings():
         vars.top_k      = js["top_k"]
     if("tfs" in js):
         vars.tfs        = js["tfs"]
+    if("typical" in js):
+        vars.typical    = js["typical"]
     if("rep_pen" in js):
         vars.rep_pen    = js["rep_pen"]
     if("rep_pen_slope" in js):
@@ -439,6 +600,232 @@ def loadmodelsettings():
         if(not vars.gamestarted):
             vars.authornotetemplate = vars.setauthornotetemplate
 
+#==================================================================#
+#  Take settings from vars and write them to client settings file
+#==================================================================#
+def savesettings():
+     # Build json to write
+    js = {}
+    js["apikey"]      = vars.apikey
+    js["andepth"]     = vars.andepth
+    js["temp"]        = vars.temp
+    js["top_p"]       = vars.top_p
+    js["top_k"]       = vars.top_k
+    js["tfs"]         = vars.tfs
+    js["typical"]     = vars.typical
+    js["rep_pen"]     = vars.rep_pen
+    js["rep_pen_slope"] = vars.rep_pen_slope
+    js["rep_pen_range"] = vars.rep_pen_range
+    js["genamt"]      = vars.genamt
+    js["max_length"]  = vars.max_length
+    js["ikgen"]       = vars.ikgen
+    js["formatoptns"] = vars.formatoptns
+    js["numseqs"]     = vars.numseqs
+    js["widepth"]     = vars.widepth
+    js["useprompt"]   = vars.useprompt
+    js["adventure"]   = vars.adventure
+    js["chatmode"]    = vars.chatmode
+    js["chatname"]    = vars.chatname
+    js["dynamicscan"] = vars.dynamicscan
+    js["nopromptgen"] = vars.nopromptgen
+    js["rngpersist"]  = vars.rngpersist
+    js["nogenmod"]    = vars.nogenmod
+    js["autosave"]    = vars.autosave
+    js["welcome"]     = vars.welcome
+    js["newlinemode"] = vars.newlinemode
+
+    js["antemplate"]  = vars.setauthornotetemplate
+
+    js["userscripts"] = vars.userscripts
+    js["corescript"]  = vars.corescript
+    js["softprompt"]  = vars.spfilename
+
+    # Write it
+    if not os.path.exists('settings'):
+        os.mkdir('settings')
+    file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
+    try:
+        file.write(json.dumps(js, indent=3))
+    finally:
+        file.close()
+
+#==================================================================#
+#  Don't save settings unless 2 seconds have passed without modification
+#==================================================================#
+@debounce(2)
+def settingschanged():
+    print("{0}Saving settings!{1}".format(colors.GREEN, colors.END))
+    savesettings()
+
+#==================================================================#
+#  Read settings from client file JSON and send to vars
+#==================================================================#
+
+def loadsettings():
+    if(path.exists("defaults/" + getmodelname().replace('/', '_') + ".settings")):
+        # Read file contents into JSON object
+        file = open("defaults/" + getmodelname().replace('/', '_') + ".settings", "r")
+        js   = json.load(file)
+        
+        processsettings(js)
+        file.close()
+    if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
+        # Read file contents into JSON object
+        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
+        js   = json.load(file)
+        
+        processsettings(js)
+        file.close()
+        
+def processsettings(js):
+# Copy file contents to vars
+    if("apikey" in js):
+        vars.apikey     = js["apikey"]
+    if("andepth" in js):
+        vars.andepth    = js["andepth"]
+    if("temp" in js):
+        vars.temp       = js["temp"]
+    if("top_p" in js):
+        vars.top_p      = js["top_p"]
+    if("top_k" in js):
+        vars.top_k      = js["top_k"]
+    if("tfs" in js):
+        vars.tfs        = js["tfs"]
+    if("typical" in js):
+        vars.typical    = js["typical"]
+    if("rep_pen" in js):
+        vars.rep_pen    = js["rep_pen"]
+    if("rep_pen_slope" in js):
+        vars.rep_pen_slope = js["rep_pen_slope"]
+    if("rep_pen_range" in js):
+        vars.rep_pen_range = js["rep_pen_range"]
+    if("genamt" in js):
+        vars.genamt     = js["genamt"]
+    if("max_length" in js):
+        vars.max_length = js["max_length"]
+    if("ikgen" in js):
+        vars.ikgen      = js["ikgen"]
+    if("formatoptns" in js):
+        vars.formatoptns = js["formatoptns"]
+    if("numseqs" in js):
+        vars.numseqs = js["numseqs"]
+    if("widepth" in js):
+        vars.widepth = js["widepth"]
+    if("useprompt" in js):
+        vars.useprompt = js["useprompt"]
+    if("adventure" in js):
+        vars.adventure = js["adventure"]
+    if("chatmode" in js):
+        vars.chatmode = js["chatmode"]
+    if("chatname" in js):
+        vars.chatname = js["chatname"]
+    if("dynamicscan" in js):
+        vars.dynamicscan = js["dynamicscan"]
+    if("nopromptgen" in js):
+        vars.nopromptgen = js["nopromptgen"]
+    if("rngpersist" in js):
+        vars.rngpersist = js["rngpersist"]
+    if("nogenmod" in js):
+        vars.nogenmod = js["nogenmod"]
+    if("autosave" in js):
+        vars.autosave = js["autosave"]
+    if("newlinemode" in js):
+        vars.newlinemode = js["newlinemode"]
+    if("welcome" in js):
+        vars.welcome = js["welcome"]
+
+    if("antemplate" in js):
+        vars.setauthornotetemplate = js["antemplate"]
+        if(not vars.gamestarted):
+            vars.authornotetemplate = vars.setauthornotetemplate
+    
+    if("userscripts" in js):
+        vars.userscripts = []
+        for userscript in js["userscripts"]:
+            if type(userscript) is not str:
+                continue
+            userscript = userscript.strip()
+            if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
+                vars.userscripts.append(userscript)
+
+    if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
+        vars.corescript = js["corescript"]
+    else:
+        vars.corescript = "default.lua"
+
+#==================================================================#
+#  Load a soft prompt from a file
+#==================================================================#
+
+def check_for_sp_change():
+    while(True):
+        time.sleep(0.1)
+        if(vars.sp_changed):
+            with app.app_context():
+                emit('from_server', {'cmd': 'spstatitems', 'data': {vars.spfilename: vars.spmeta} if vars.allowsp and len(vars.spfilename) else {}}, namespace=None, broadcast=True)
+            vars.sp_changed = False
+
+def spRequest(filename):
+    if(not vars.allowsp):
+        raise RuntimeError("Soft prompts are not supported by your current model/backend")
+    
+    old_filename = vars.spfilename
+
+    vars.spfilename = ""
+    settingschanged()
+
+    if(len(filename) == 0):
+        vars.sp = None
+        vars.sp_length = 0
+        if(old_filename != filename):
+            vars.sp_changed = True
+        return
+
+    global np
+    if 'np' not in globals():
+        import numpy as np
+
+    z, version, shape, fortran_order, dtype = fileops.checksp(filename, vars.modeldim)
+    if not isinstance(z, zipfile.ZipFile):
+        raise RuntimeError(f"{repr(filename)} is not a valid soft prompt file")
+    with z.open('meta.json') as f:
+        vars.spmeta = json.load(f)
+    z.close()
+
+    with np.load(fileops.sppath(filename), allow_pickle=False) as f:
+        tensor = f['tensor.npy']
+
+    # If the tensor is in bfloat16 format, convert it to float32
+    if(tensor.dtype == 'V2'):
+        tensor.dtype = np.uint16
+        tensor = np.uint32(tensor) << 16
+        tensor.dtype = np.float32
+
+    if(tensor.dtype != np.float16):
+        tensor = np.float32(tensor)
+    assert not np.isinf(tensor).any() and not np.isnan(tensor).any()
+
+    vars.sp_length = tensor.shape[-2]
+    vars.spmeta["n_tokens"] = vars.sp_length
+
+    if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
+        rows = tensor.shape[0]
+        padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
+        tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
+        tensor = tensor.reshape(
+            tpu_mtj_backend.params["cores_per_replica"],
+            -1,
+            tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
+        )
+        vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
+    else:
+        vars.sp = torch.from_numpy(tensor)
+
+    vars.spfilename = filename
+    settingschanged()
+    if(old_filename != filename):
+            vars.sp_changed = True
+
 #==================================================================#
 # Startup
 #==================================================================#
@@ -447,8 +834,13 @@ def loadmodelsettings():
 parser = argparse.ArgumentParser(description="KoboldAI Server")
 parser.add_argument("--remote", action='store_true', help="Optimizes KoboldAI for Remote Play")
 parser.add_argument("--ngrok", action='store_true', help="Optimizes KoboldAI for Remote Play using Ngrok")
+parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel")
+parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service")
+parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
+parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
 parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
 parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
+parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
 parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
 parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
 parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
@@ -458,7 +850,11 @@ parser.add_argument("--override_rename", action='store_true', help="Renaming sto
 parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
 parser.add_argument("--colab", action='store_true', help="Optimize for Google Colab.")
 parser.add_argument("--nobreakmodel", action='store_true', help="Disables Breakmodel support completely.")
-
+parser.add_argument("--unblock", action='store_true', default=False, help="Unblocks the KoboldAI port to be accessible from other machines without optimizing for remote play (It is recommended to use --host instead)")
+parser.add_argument("--quiet", action='store_true', default=False, help="If present will suppress any story related text from showing on the console")
+parser.add_argument("--no_aria2", action='store_true', default=False, help="Prevents KoboldAI from using aria2 to download huggingface models more efficiently, in case aria2 is causing you issues")
+parser.add_argument("--lowmem", action='store_true', help="Extra Low Memory loading for the GPU, slower but memory does not peak to twice the usage")
+parser.add_argument("--savemodel", action='store_true', help="Saves the model to the models folder even if --colab is used (Allows you to save models to Google Drive)")
 args: argparse.Namespace = None
 if(os.environ.get("KOBOLDAI_ARGS") is not None):
     import shlex
@@ -467,24 +863,41 @@ else:
     args = parser.parse_args()
 
 vars.model = args.model;
+vars.revision = args.revision
 
 if args.colab:
     args.remote = True;
     args.override_rename = True;
     args.override_delete = True;
     args.nobreakmodel = True;
+    args.quiet = True;
+    args.lowmem = True;
+
+if args.quiet:
+    vars.quiet = True
 
 if args.nobreakmodel:
     vars.nobreakmodel = True;
 
 if args.remote:
-    vars.remote = True;
+    vars.host = True;
 
 if args.ngrok:
-    vars.remote = True;
+    vars.host = True;
 
-vars.smandelete = vars.remote == args.override_delete
-vars.smanrename = vars.remote == args.override_rename
+if args.localtunnel:
+    vars.host = True;
+
+if args.host:
+    vars.host = True;
+
+if args.cpu:
+    vars.use_colab_tpu = False
+
+vars.smandelete = vars.host == args.override_delete
+vars.smanrename = vars.host == args.override_rename
+
+vars.aria2_port = args.aria2_port or 6799
 
 # Select a model to run
 if args.model:
@@ -496,10 +909,10 @@ if args.model:
 
 else:
     print("{0}Welcome to the KoboldAI Server!\nListed RAM is the optimal VRAM and CPU ram can be up to twice the amount.\nMost models can run at less VRAM with reduced max tokens or less layers on the GPU.\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
-    getModelSelection()
+    getModelSelection(mainmenu)
 
 # If transformers model was selected & GPU available, ask to use CPU or GPU
-if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransformerGPTJ"]):
+if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
     vars.allowsp = True
     # Test for GPU support
     import torch
@@ -515,13 +928,19 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
     from transformers import AutoConfig
     if(os.path.isdir(vars.custmodpth.replace('/', '_'))):
         try:
-            model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), cache_dir="cache/")
+            model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
+            vars.model_type = model_config.model_type
+        except ValueError as e:
+            vars.model_type = "not_found"
+    elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
+        try:
+            model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
             vars.model_type = model_config.model_type
         except ValueError as e:
             vars.model_type = "not_found"
     else:
         try:
-            model_config = AutoConfig.from_pretrained(vars.custmodpth, cache_dir="cache/")
+            model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
             vars.model_type = model_config.model_type
         except ValueError as e:
             vars.model_type = "not_found"
@@ -532,10 +951,16 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
     elif(vars.model_type == "not_found"):
         print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
         vars.model_type = "gpt_neo"
+
+    if(vars.model_type == "opt"):
+        vars.badwordsids = vars.badwordsids_opt
+
+if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
     loadmodelsettings()
+    loadsettings()
     print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
     vars.hascuda = torch.cuda.is_available()
-    vars.bmsupported = vars.model_type in ("gpt_neo", "gptj") and not vars.nobreakmodel
+    vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
     if(args.breakmodel is not None and args.breakmodel):
         print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
     if(args.breakmodel_layers is not None):
@@ -629,12 +1054,20 @@ if(vars.model == "InferKit"):
                 file.write(json.dumps(js, indent=3))
             finally:
                 file.close()
+                
+# Swap OAI Server if GooseAI was selected
+if(vars.model == "GooseAI"):
+    vars.oaiengines = "https://api.goose.ai/v1/engines"
+    vars.model = "OAI"
+    args.configname = "GooseAI"
 
 # Ask for API key if OpenAI was selected
 if(vars.model == "OAI"):
+    if not args.configname:
+        args.configname = "OAI"
     if(not path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
         # If the client settings file doesn't exist, create it
-        print("{0}Please enter your OpenAI API key:{1}\n".format(colors.CYAN, colors.END))
+        print("{0}Please enter your API key:{1}\n".format(colors.CYAN, colors.END))
         vars.oaiapikey = input("Key> ")
         # Write API key to file
         os.makedirs('settings', exist_ok=True)
@@ -655,7 +1088,7 @@ if(vars.model == "OAI"):
             file.close()
         else:
             # Get API key, add it to settings object, and write it to disk
-            print("{0}Please enter your OpenAI API key:{1}\n".format(colors.CYAN, colors.END))
+            print("{0}Please enter your API key:{1}\n".format(colors.CYAN, colors.END))
             vars.oaiapikey = input("Key> ")
             js["oaiapikey"] = vars.oaiapikey
             # Write API key to file
@@ -665,38 +1098,44 @@ if(vars.model == "OAI"):
             finally:
                 file.close()
     
-    # Get list of models from OAI
-    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
-    req = requests.get(
-        vars.oaiengines, 
-        headers = {
-            'Authorization': 'Bearer '+vars.oaiapikey
-            }
-        )
-    if(req.status_code == 200):
-        print("{0}OK!{1}".format(colors.GREEN, colors.END))
-        print("{0}Please select an engine to use:{1}\n".format(colors.CYAN, colors.END))
-        engines = req.json()["data"]
-        # Print list of engines
-        i = 0
-        for en in engines:
-            print("    {0} - {1} ({2})".format(i, en["id"], "\033[92mready\033[0m" if en["ready"] == True else "\033[91mnot ready\033[0m"))
-            i += 1
-        # Get engine to use
-        print("")
-        engselected = False
-        while(engselected == False):
-            engine = input("Engine #> ")
-            if(engine.isnumeric() and int(engine) < len(engines)):
-                vars.oaiurl = "https://api.openai.com/v1/engines/{0}/completions".format(engines[int(engine)]["id"])
-                engselected = True
-            else:
-                print("{0}Please enter a valid selection.{1}".format(colors.RED, colors.END))
+    if vars.custmodpth:
+        vars.oaiurl = vars.oaiengines + "/" + vars.custmodpth + "/completions"
+        args.configname = args.configname + "/" + vars.custmodpth
+        engselected = True
     else:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        print("{0}ERROR!{1}".format(colors.RED, colors.END))
-        print(req.json())
-        quit()
+        # Get list of models from OAI
+        print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
+        req = requests.get(
+            vars.oaiengines, 
+            headers = {
+                'Authorization': 'Bearer '+vars.oaiapikey
+                }
+            )
+        if(req.status_code == 200):
+            print("{0}OK!{1}".format(colors.GREEN, colors.END))
+            print("{0}Please select an engine to use:{1}\n".format(colors.CYAN, colors.END))
+            engines = req.json()["data"]
+            # Print list of engines
+            i = 0
+            for en in engines:
+                print("    {0} - {1} ({2})".format(i, en["id"], "\033[92mready\033[0m" if en["ready"] == True else "\033[91mnot ready\033[0m"))
+                i += 1
+            # Get engine to use
+            print("")
+            engselected = False
+            while(engselected == False):
+                engine = input("Engine #> ")
+                if(engine.isnumeric() and int(engine) < len(engines)):
+                    vars.oaiurl = vars.oaiengines + "/{0}/completions".format(engines[int(engine)]["id"])
+                    args.configname = args.configname + "/" + engines[int(engine)]["id"]
+                    engselected = True
+                else:
+                    print("{0}Please enter a valid selection.{1}".format(colors.RED, colors.END))
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            print("{0}ERROR!{1}".format(colors.RED, colors.END))
+            print(req.json())
+            quit()
 
 # Ask for ngrok url if Google Colab was selected
 if(vars.model == "Colab"):
@@ -717,23 +1156,171 @@ log.setLevel(logging.ERROR)
 print("{0}Initializing Flask... {1}".format(colors.PURPLE, colors.END), end="")
 from flask import Flask, render_template, Response, request, copy_current_request_context
 from flask_socketio import SocketIO, emit
-app = Flask(__name__)
+app = Flask(__name__, root_path=os.getcwd())
 app.config['SECRET KEY'] = 'secret!'
 socketio = SocketIO(app, async_method="eventlet")
+socketio.start_background_task(check_for_sp_change)
 print("{0}OK!{1}".format(colors.GREEN, colors.END))
 
 # Start transformers and create pipeline
-if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransformerGPTJ"]):
+if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
     if(not vars.noai):
         print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
         from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer
+        for m in ("GPTJModel", "XGLMModel"):
+            try:
+                globals()[m] = getattr(__import__("transformers"), m)
+            except:
+                pass
         try:
-            from transformers import GPTJModel
+            from transformers.models.opt.modeling_opt import OPTDecoder
         except:
             pass
         import transformers.generation_utils
         from transformers import __version__ as transformers_version
 
+        from transformers import PreTrainedModel
+        from transformers import modeling_utils
+        old_from_pretrained = PreTrainedModel.from_pretrained.__func__
+        @classmethod
+        def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+            vars.fp32_model = False
+            utils.num_shards = None
+            utils.current_shard = 0
+            utils.from_pretrained_model_name = pretrained_model_name_or_path
+            utils.from_pretrained_index_filename = None
+            utils.from_pretrained_kwargs = kwargs
+            utils.bar = None
+            if not args.no_aria2:
+                utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
+            return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
+        PreTrainedModel.from_pretrained = new_from_pretrained
+        if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
+            old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
+            def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
+                utils.num_shards = utils.get_num_shards(index_filename)
+                utils.from_pretrained_index_filename = index_filename
+                return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
+            modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
+
+        # Lazy loader
+        import torch_lazy_loader
+        def get_lazy_load_callback(n_layers, convert_to_float16=True):
+            if not vars.lazy_load:
+                return
+
+            from tqdm.auto import tqdm
+
+            if "breakmodel" in globals():
+                gpu_blocks = breakmodel.gpu_blocks
+                ram_blocks = ram_blocks = n_layers - sum(gpu_blocks)
+                cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
+            else:
+                ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
+
+            def lazy_load_callback(model_dict, f, **_):
+                if lazy_load_callback.nested:
+                    return
+                lazy_load_callback.nested = True
+
+                device_map = {}
+
+                for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
+                    for layer in range(n_layers):
+                        key = _key.format(layer=layer)
+                        if key not in model_dict:
+                            continue
+                        device = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel or layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks)
+                        device_map[key] = device
+
+                for key, value in model_dict.items():
+                    if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
+                        device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
+
+                if utils.num_shards is None or utils.current_shard == 0:
+                    if utils.num_shards is not None:
+                        num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                    else:
+                        num_tensors = len(device_map)
+                    print(flush=True)
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+
+                with zipfile.ZipFile(f, "r") as z:
+                    try:
+                        last_storage_key = None
+                        f = None
+                        current_offset = 0
+                        if utils.num_shards is not None:
+                            utils.current_shard += 1
+                        for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
+                            storage_key = model_dict[key].key
+                            if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
+                                last_storage_key = storage_key
+                                if isinstance(f, zipfile.ZipExtFile):
+                                    f.close()
+                                f = z.open(f"archive/data/{storage_key}")
+                                current_offset = 0
+                            if current_offset != model_dict[key].seek_offset:
+                                f.read(model_dict[key].seek_offset - current_offset)
+                                current_offset = model_dict[key].seek_offset
+                            device = device_map[key]
+                            size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
+                            dtype = model_dict[key].dtype
+                            nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
+                            #print(f"Transferring <{key}>  to  {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
+                            model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
+                            if model_dict[key].dtype is torch.float32:
+                                vars.fp32_model = True
+                            if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
+                                model_dict[key] = model_dict[key].to(torch.float16)
+                            if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
+                                model_dict[key] = model_dict[key].to(torch.float32)
+                            model_dict[key] = model_dict[key].to(device)
+                            #print("OK", flush=True)
+                            current_offset += nbytes
+                            utils.bar.update(1)
+                    finally:
+                        if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                            utils.bar.close()
+                            utils.bar = None
+                        lazy_load_callback.nested = False
+                        if isinstance(f, zipfile.ZipExtFile):
+                            f.close()
+
+            lazy_load_callback.nested = False
+            return lazy_load_callback
+
+        lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
+        if(vars.lazy_load and "model_config" in globals() and os.path.isfile(lazy_load_config_path)):
+            with open(lazy_load_config_path) as f:
+                lazy_load_spec = json.load(f)
+
+        else:
+            vars.lazy_load = False
+
+        # Some versions of transformers 4.17.0.dev0 are affected by
+        # https://github.com/huggingface/transformers/issues/15736
+        # This is a workaround for those versions of transformers.
+        if(transformers_version == "4.17.0.dev0"):
+            try:
+                from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding
+            except ImportError:
+                pass
+            else:
+                @torch.no_grad()
+                def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0):
+                    bsz, seq_len = inputs_embeds.size()[:-1]
+                    input_shape = inputs_embeds.size()[:-1]
+                    sequence_length = input_shape[1]
+                    position_ids = torch.arange(
+                        past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
+                    ).unsqueeze(0).expand(input_shape).contiguous()
+                    max_pos = self.padding_idx + 1 + seq_len + past_key_values_length
+                    if max_pos > self.weights.size(0):
+                        self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx)
+                    return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach()
+                XGLMSinusoidalPositionalEmbedding.forward = new_forward
+
         # Patch transformers to use our soft prompt
         def patch_causallm(cls):
             old_forward = cls.forward
@@ -744,7 +1331,12 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
                 if(vars.sp is not None):
                     shifted_input_ids = input_ids - self.config.vocab_size
                 input_ids.clamp_(max=self.config.vocab_size-1)
-                inputs_embeds = self.transformer.wte(input_ids)
+                if(hasattr(self, "transformer")):
+                    inputs_embeds = self.transformer.wte(input_ids)
+                elif(not hasattr(self.model, "decoder")):
+                    inputs_embeds = self.model.embed_tokens(input_ids)
+                else:
+                    inputs_embeds = self.model.decoder.embed_tokens(input_ids)
                 if(vars.sp is not None):
                     vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
                     inputs_embeds = torch.where(
@@ -752,21 +1344,42 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
                         vars.sp[shifted_input_ids.clamp(min=0)],
                         inputs_embeds,
                     )
+                if(hasattr(self, "model") and hasattr(self.model, "embed_scale")):
+                    inputs_embeds *= self.model.embed_scale
                 kwargs['inputs_embeds'] = inputs_embeds
                 return old_forward(self, *args, **kwargs)
             cls.forward = new_causallm_forward
         for cls in (GPT2LMHeadModel, GPTNeoForCausalLM):
             patch_causallm(cls)
-        try:
-            from transformers import GPTJForCausalLM
-            patch_causallm(GPTJForCausalLM)
-        except:
-            pass
+        for c in ("GPTJForCausalLM", "XGLMForCausalLM", "OPTForCausalLM"):
+            try:
+                patch_causallm(getattr(__import__("transformers"), c))
+            except:
+                pass
+
+
+        # Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
+        if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) <= packaging.version.parse("4.19.2")):
+            try:
+                from transformers import OPTForCausalLM, OPTModel
+            except ImportError:
+                pass
+            else:
+                # This is the same as the original __init__ but with
+                # config.hidden_size
+                # replaced with
+                # config.word_embed_proj_dim
+                def new_init(self, config):
+                    super(OPTForCausalLM, self).__init__(config)
+                    self.model = OPTModel(config)
+                    self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
+                    self.post_init()
+                OPTForCausalLM.__init__ = new_init
 
 
         # Patch transformers to use our custom logit warpers
         from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
-        from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper
+        from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper
 
         def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
             old_call = cls.__call__
@@ -788,6 +1401,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
         dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
         dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
         dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
+        dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
         dynamic_processor_wrap(TemperatureLogitsWarper, "temperature", "temp", cond=lambda x: x != 1.0)
         RepetitionPenaltyLogitsProcessor.__init__ = AdvancedRepetitionPenaltyLogitsProcessor.__init__
         RepetitionPenaltyLogitsProcessor.__call__ = AdvancedRepetitionPenaltyLogitsProcessor.__call__
@@ -833,6 +1447,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
             warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
             warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
             warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
+            warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
             warper_list.append(TemperatureLogitsWarper(temperature=0.5))
             return warper_list
         
@@ -841,6 +1456,9 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
             kwargs["logits_warper"] = new_get_logits_warper(
                 beams=1,
             )
+            if(vars.newlinemode == "s") or (vars.newlinemode == "ns"):
+                kwargs["eos_token_id"] = -1
+                kwargs.setdefault("pad_token_id", 2)
             return new_sample.old_sample(self, *args, **kwargs)
         new_sample.old_sample = transformers.generation_utils.GenerationMixin.sample
         transformers.generation_utils.GenerationMixin.sample = new_sample
@@ -892,7 +1510,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
                     return self.regeneration_required or self.halt
                 tail = input_ids[..., -vars.generated_tkns:]
                 for i, t in enumerate(tail):
-                    decoded = tokenizer.decode(t)
+                    decoded = utils.decodenewlines(tokenizer.decode(t))
                     _, found = checkworldinfo(decoded, force_use_txt=True, actions=vars._actions)
                     found -= self.excluded_world_info[i]
                     if(len(found) != 0):
@@ -913,12 +1531,18 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
 
         def get_hidden_size_from_model(model):
             try:
-                return int(model.transformer.hidden_size)
+                return int(model.model.decoder.project_in.in_features)
             except:
                 try:
-                    return int(model.transformer.embed_dim)
+                    return int(model.model.decoder.embed_tokens.out_features)
                 except:
-                    return int(model.lm_head.in_features)
+                    try:
+                        return int(model.transformer.hidden_size)
+                    except:
+                        try:
+                            return int(model.transformer.embed_dim)
+                        except:
+                            return int(model.lm_head.in_features)
         
         def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
             if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
@@ -928,7 +1552,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
         
         @contextlib.contextmanager
         def maybe_use_float16(always_use=False):
-            if(always_use or (vars.hascuda and (vars.usegpu or vars.breakmodel))):
+            if(always_use or (vars.hascuda and args.lowmem and (vars.usegpu or vars.breakmodel))):
                 original_dtype = torch.get_default_dtype()
                 torch.set_default_dtype(torch.float16)
                 yield True
@@ -938,11 +1562,12 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
 
         # If custom GPT2 model was chosen
         if(vars.model == "GPT2Custom"):
+            vars.lazy_load = False
             model_config = open(vars.custmodpth + "/config.json", "r")
             js   = json.load(model_config)
             with(maybe_use_float16()):
-                model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, cache_dir="cache/")
-            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache/")
+                model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
             vars.modeldim = get_hidden_size_from_model(model)
             # Is CUDA available? If so, use GPU, otherwise fall back to CPU
             if(vars.hascuda and vars.usegpu):
@@ -959,45 +1584,99 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
             # feature yet
             if(vars.model_type == "gpt2"):
                 lowmem = {}
+            
+            # If we're using torch_lazy_loader, we need to get breakmodel config
+            # early so that it knows where to load the individual model tensors
+            if(vars.lazy_load and vars.hascuda and vars.breakmodel):
+                device_config(model_config)
 
             # Download model from Huggingface if it does not exist, otherwise load locally
-            if(os.path.isdir(vars.custmodpth)):
-               with(maybe_use_float16()):
-                   try:
-                       tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, cache_dir="cache/")
-                   except ValueError as e:
-                       tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache/")
-                   try:
-                       model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache/", **lowmem)
-                   except ValueError as e:
-                       model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache/", **lowmem)
-            elif(os.path.isdir(vars.model.replace('/', '_'))):
-               with(maybe_use_float16()):
-                   try:
-                       tokenizer = AutoTokenizer.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/")
-                   except ValueError as e:
-                       tokenizer = GPT2TokenizerFast.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/")
-                   try:
-                       model     = AutoModelForCausalLM.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/", **lowmem)
-                   except ValueError as e:
-                       model     = GPTNeoForCausalLM.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/", **lowmem)
-            else:
-                try:
-                    tokenizer = AutoTokenizer.from_pretrained(vars.model, cache_dir="cache/")
-                except ValueError as e:
-                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, cache_dir="cache/")
-                with(maybe_use_float16()):
+            
+            #If we specify a model and it's in the root directory, we need to move it to the models directory (legacy folder structure to new)
+            if os.path.isdir(vars.model.replace('/', '_')):
+                import shutil
+                shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
+            print("\n", flush=True)
+            with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
+                if(vars.lazy_load):  # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
+                    lowmem = {}
+                if(os.path.isdir(vars.custmodpth)):
                     try:
-                        model     = AutoModelForCausalLM.from_pretrained(vars.model, cache_dir="cache/", **lowmem)
-                    except ValueError as e:
-                        model     = GPTNeoForCausalLM.from_pretrained(vars.model, cache_dir="cache/", **lowmem)
-                
-                if not args.colab:
-                    model = model.half()
-                    import shutil
-                    shutil.rmtree("cache/")
-                    model.save_pretrained(vars.model.replace('/', '_'))
-                    tokenizer.save_pretrained(vars.model.replace('/', '_'))
+                        tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        try:
+                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                        except Exception as e:
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+                    try:
+                        model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
+                    except Exception as e:
+                        model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
+                elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        try:
+                            tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+                        except Exception as e:
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+                    try:
+                        model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
+                    except Exception as e:
+                        model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
+                else:
+                    old_rebuild_tensor = torch._utils._rebuild_tensor
+                    def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
+                        if(not isinstance(storage, torch_lazy_loader.LazyTensor)):
+                            dtype = storage.dtype
+                        else:
+                            dtype = storage.storage_type.dtype
+                            if(not isinstance(dtype, torch.dtype)):
+                                dtype = storage.storage_type(0).dtype
+                        if(dtype is torch.float32 and len(shape) >= 2):
+                            vars.fp32_model = True
+                        return old_rebuild_tensor(storage, storage_offset, shape, stride)
+                    torch._utils._rebuild_tensor = new_rebuild_tensor
+
+                    try:
+                        tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+                    except Exception as e:
+                        try:
+                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+                        except Exception as e:
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+                    try:
+                        model     = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
+                    except Exception as e:
+                        model     = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
+
+                    torch._utils._rebuild_tensor = old_rebuild_tensor
+
+                    if not args.colab or args.savemodel:
+                        import shutil
+                        tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
+                        if(vars.fp32_model):  # Use save_pretrained to convert fp32 models to fp16
+                            model = model.half()
+                            model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
+                        else:  # For fp16 models, we can just copy the model files directly
+                            import transformers.configuration_utils
+                            import transformers.modeling_utils
+                            import transformers.file_utils
+                            # Save the config.json
+                            shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
+                            if(utils.num_shards is None):
+                                # Save the pytorch_model.bin of an unsharded model
+                                shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
+                            else:
+                                with open(utils.from_pretrained_index_filename) as f:
+                                    map_data = json.load(f)
+                                filenames = set(map_data["weight_map"].values())
+                                # Save the pytorch_model.bin.index.json of a sharded model
+                                shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
+                                # Then save the pytorch_model-#####-of-#####.bin files
+                                for filename in filenames:
+                                    shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
+                        shutil.rmtree("cache/")
             
             if(vars.hascuda):
                 if(vars.usegpu):
@@ -1006,7 +1685,9 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
                     generator = model.generate
                 elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel)
                     vars.modeldim = get_hidden_size_from_model(model)
-                    device_config(model)
+                    if(not vars.lazy_load):
+                        device_config(model.config)
+                    move_model_to_devices(model)
                 else:
                     model = model.to('cpu').float()
                     vars.modeldim = get_hidden_size_from_model(model)
@@ -1027,22 +1708,46 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
     
     else:
         from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
 else:
+    from transformers import PreTrainedModel
+    from transformers import modeling_utils
+    old_from_pretrained = PreTrainedModel.from_pretrained.__func__
+    @classmethod
+    def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        vars.fp32_model = False
+        utils.num_shards = None
+        utils.current_shard = 0
+        utils.from_pretrained_model_name = pretrained_model_name_or_path
+        utils.from_pretrained_index_filename = None
+        utils.from_pretrained_kwargs = kwargs
+        utils.bar = None
+        if not args.no_aria2:
+            utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
+        return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
+    PreTrainedModel.from_pretrained = new_from_pretrained
+    if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
+        old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
+        def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
+            utils.num_shards = utils.get_num_shards(index_filename)
+            utils.from_pretrained_index_filename = index_filename
+            return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
+        modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
+
     def tpumtjgetsofttokens():
         soft_tokens = None
         if(vars.sp is None):
             global np
             if 'np' not in globals():
                 import numpy as np
-            tensor = np.zeros((1, tpu_mtj_backend.params["d_model"]), dtype=np.float32)
+            tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
             rows = tensor.shape[0]
             padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
             tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
             tensor = tensor.reshape(
                 tpu_mtj_backend.params["cores_per_replica"],
                 -1,
-                tpu_mtj_backend.params["d_model"],
+                tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
             )
             vars.sp = tpu_mtj_backend.shard_xmap(tensor)
         soft_tokens = np.arange(
@@ -1087,7 +1792,7 @@ else:
             return excluded_world_info, regeneration_required, halt
 
         for i, t in enumerate(generated):
-            decoded = tokenizer.decode(past[i]) + tokenizer.decode(t[tpu_mtj_backend.params["seq"] : tpu_mtj_backend.params["seq"] + n_generated])
+            decoded = utils.decodenewlines(tokenizer.decode(past[i])) + utils.decodenewlines(tokenizer.decode(t[tpu_mtj_backend.params["seq"] : tpu_mtj_backend.params["seq"] + n_generated]))
             _, found = checkworldinfo(decoded, force_use_txt=True, actions=vars._actions)
             found -= excluded_world_info[i]
             if(len(found) != 0):
@@ -1108,6 +1813,7 @@ else:
             "temp": float(vars.temp),
             "top_k": int(vars.top_k),
             "tfs": float(vars.tfs),
+            "typical": float(vars.typical),
             "repetition_penalty": float(vars.rep_pen),
             "rpslope": float(vars.rep_pen_slope),
             "rprange": int(vars.rep_pen_range),
@@ -1116,25 +1822,36 @@ else:
     # If we're running Colab or OAI, we still need a tokenizer.
     if(vars.model == "Colab"):
         from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
+        loadsettings()
     elif(vars.model == "OAI"):
         from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+        loadsettings()
     # Load the TPU backend if requested
-    elif(vars.model == "TPUMeshTransformerGPTJ"):
+    elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
+        if(vars.model == "TPUMeshTransformerGPTNeoX"):
+            vars.badwordsids = vars.badwordsids_neox
         print("{0}Initializing Mesh Transformer JAX, please wait...{1}".format(colors.PURPLE, colors.END))
-        assert vars.model == "TPUMeshTransformerGPTJ" and vars.custmodpth and os.path.isdir(vars.custmodpth)
+        if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
+            raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
         import tpu_mtj_backend
+        if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"):
+            tpu_mtj_backend.pad_token_id = 1
         tpu_mtj_backend.vars = vars
         tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
         tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback
         tpu_mtj_backend.compiling_callback = tpumtjgenerate_compiling_callback
         tpu_mtj_backend.stopped_compiling_callback = tpumtjgenerate_stopped_compiling_callback
         tpu_mtj_backend.settings_callback = tpumtjgenerate_settings_callback
-        tpu_mtj_backend.load_model(vars.custmodpth)
         vars.allowsp = True
-        vars.modeldim = int(tpu_mtj_backend.params["d_model"])
+        loadmodelsettings()
+        loadsettings()
+        tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
+        vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]))
         tokenizer = tpu_mtj_backend.tokenizer
+    else:
+        loadsettings()
 
 # Set up Flask routes
 @app.route('/')
@@ -1162,6 +1879,7 @@ def download():
     js["authorsnote"] = vars.authornote
     js["anotetemplate"] = vars.authornotetemplate
     js["actions"]     = tuple(vars.actions.values())
+    js["actions_metadata"] = vars.actions_metadata
     js["worldinfo"]   = []
         
     # Extract only the important bits of WI
@@ -1294,8 +2012,8 @@ def lua_decode(tokens):
     if("tokenizer" not in globals()):
         from transformers import GPT2TokenizerFast
         global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
-    return tokenizer.decode(tokens)
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+    return utils.decodenewlines(tokenizer.decode(tokens))
 
 #==================================================================#
 #  Encode string into list of token IDs using current tokenizer
@@ -1306,8 +2024,8 @@ def lua_encode(string):
     if("tokenizer" not in globals()):
         from transformers import GPT2TokenizerFast
         global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
-    return tokenizer.encode(string, max_length=int(4e9), truncation=True)
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+    return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
 
 #==================================================================#
 #  Computes context given a submission, Lua array of entry UIDs and a Lua array
@@ -1347,7 +2065,7 @@ def lua_compute_context(submission, entries, folders, kwargs):
         anotetxt,
         actions,
     )
-    return tokenizer.decode(txt)
+    return utils.decodenewlines(tokenizer.decode(txt))
 
 #==================================================================#
 #  Get property of a world info entry given its UID and property name
@@ -1457,6 +2175,7 @@ def lua_has_setting(setting):
         "settopp",
         "settopk",
         "settfs",
+        "settypical",
         "setreppen",
         "setreppenslope",
         "setreppenrange",
@@ -1475,6 +2194,7 @@ def lua_has_setting(setting):
         "topk",
         "top_k",
         "tfs",
+        "typical",
         "reppen",
         "reppenslope",
         "reppenrange",
@@ -1508,6 +2228,7 @@ def lua_get_setting(setting):
     if(setting in ("settopp", "topp", "top_p")): return vars.top_p
     if(setting in ("settopk", "topk", "top_k")): return vars.top_k
     if(setting in ("settfs", "tfs")): return vars.tfs
+    if(setting in ("settypical", "typical")): return vars.typical
     if(setting in ("setreppen", "reppen")): return vars.rep_pen
     if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
     if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
@@ -1542,6 +2263,7 @@ def lua_set_setting(setting, v):
     if(setting in ("settopp", "topp")): vars.top_p = v
     if(setting in ("settopk", "topk")): vars.top_k = v
     if(setting in ("settfs", "tfs")): vars.tfs = v
+    if(setting in ("settypical", "typical")): vars.typical = v
     if(setting in ("setreppen", "reppen")): vars.rep_pen = v
     if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
     if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
@@ -1629,7 +2351,11 @@ def lua_set_chunk(k, v):
             del vars._actions[chunk-1]
         vars.lua_deleted.add(chunk)
         if(not hasattr(vars, "_actions") or vars._actions is not vars.actions):
-            del vars.actions[chunk-1]
+            #Instead of deleting we'll blank out the text. This way our actions and actions_metadata stay in sync and we can restore the chunk on an undo
+            vars.actions[chunk-1] = ""
+            vars.actions_metadata[chunk-1]['Alternative Text'] = [{"Text": vars.actions_metadata[chunk-1]['Selected Text'], "Pinned": False, "Editted": True}] + vars.actions_metadata[chunk-1]['Alternative Text']
+            vars.actions_metadata[chunk-1]['Selected Text'] = ''
+            send_debug()
     else:
         if(k == 0):
             print(colors.GREEN + f"{lua_log_format_name(vars.lua_koboldbridge.logging_name)} edited prompt chunk" + colors.END)
@@ -1646,6 +2372,9 @@ def lua_set_chunk(k, v):
                 vars._actions[chunk-1] = v
             vars.lua_edited.add(chunk)
             vars.actions[chunk-1] = v
+            vars.actions_metadata[chunk-1]['Alternative Text'] = [{"Text": vars.actions_metadata[chunk-1]['Selected Text'], "Pinned": False, "Editted": True}] + vars.actions_metadata[chunk-1]['Alternative Text']
+            vars.actions_metadata[chunk-1]['Selected Text'] = v
+            send_debug()
 
 #==================================================================#
 #  Get model type as "gpt-2-xl", "gpt-neo-2.7B", etc.
@@ -1656,7 +2385,7 @@ def lua_get_modeltype():
         return "readonly"
     if(vars.model in ("Colab", "OAI", "InferKit")):
         return "api"
-    if(vars.model not in ("TPUMeshTransformerGPTJ",) and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
+    if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
         hidden_size = get_hidden_size_from_model(model)
     if(vars.model in ("gpt2",) or (vars.model_type == "gpt2" and hidden_size == 768)):
         return "gpt2"
@@ -1672,7 +2401,7 @@ def lua_get_modeltype():
         return "gpt-neo-1.3B"
     if(vars.model in ("EleutherAI/gpt-neo-2.7B",) or (vars.model_type == "gpt_neo" and hidden_size == 2560)):
         return "gpt-neo-2.7B"
-    if(vars.model in ("EleutherAI/gpt-j-6B",) or (vars.model == "TPUMeshTransformerGPTJ" and tpu_mtj_backend.params["d_model"] == 4096) or (vars.model_type in ("gpt_neo", "gptj") and hidden_size == 4096)):
+    if(vars.model in ("EleutherAI/gpt-j-6B",) or ((vars.use_colab_tpu or vars.model == "TPUMeshTransformerGPTJ") and tpu_mtj_backend.params["d_model"] == 4096) or (vars.model_type in ("gpt_neo", "gptj") and hidden_size == 4096)):
         return "gpt-j-6B"
     return "unknown"
 
@@ -1685,7 +2414,7 @@ def lua_get_modelbackend():
         return "readonly"
     if(vars.model in ("Colab", "OAI", "InferKit")):
         return "api"
-    if(vars.model in ("TPUMeshTransformerGPTJ",)):
+    if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
         return "mtj"
     return "transformers"
 
@@ -1694,7 +2423,30 @@ def lua_get_modelbackend():
 #==================================================================#
 @bridged_kwarg()
 def lua_is_custommodel():
-    return vars.model in ("GPT2Custom", "NeoCustom", "TPUMeshTransformerGPTJ")
+    return vars.model in ("GPT2Custom", "NeoCustom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
+
+#==================================================================#
+#  Return the filename (as a string) of the current soft prompt, or
+#  None if no soft prompt is loaded
+#==================================================================#
+@bridged_kwarg()
+def lua_get_spfilename():
+    return vars.spfilename.strip() or None
+
+#==================================================================#
+#  When called with a string as argument, sets the current soft prompt;
+#  when called with None as argument, uses no soft prompt.
+#  Returns True if soft prompt changed, False otherwise.
+#==================================================================#
+@bridged_kwarg()
+def lua_set_spfilename(filename: Union[str, None]):
+    if(filename is None):
+        filename = ""
+    filename = str(filename).strip()
+    changed = lua_get_spfilename() != filename
+    assert all(q not in filename for q in ("/", "\\"))
+    spRequest(filename)
+    return changed
 
 #==================================================================#
 #  
@@ -1753,16 +2505,16 @@ vars.lua_state = lupa.LuaRuntime(unpack_returned_tuples=True)
 
 # Load bridge.lua
 bridged = {
-    "corescript_path": os.path.join(os.path.dirname(os.path.realpath(__file__)), "cores"),
-    "userscript_path": os.path.join(os.path.dirname(os.path.realpath(__file__)), "userscripts"),
-    "config_path": os.path.join(os.path.dirname(os.path.realpath(__file__)), "userscripts"),
-    "lib_paths": vars.lua_state.table(os.path.join(os.path.dirname(os.path.realpath(__file__)), "lualibs"), os.path.join(os.path.dirname(os.path.realpath(__file__)), "extern", "lualibs")),
+    "corescript_path": "cores",
+    "userscript_path": "userscripts",
+    "config_path": "userscripts",
+    "lib_paths": vars.lua_state.table("lualibs", os.path.join("extern", "lualibs")),
     "vars": vars,
 }
 for kwarg in _bridged:
     bridged[kwarg] = _bridged[kwarg]
 try:
-    vars.lua_kobold, vars.lua_koboldcore, vars.lua_koboldbridge = vars.lua_state.globals().dofile(os.path.join(os.path.dirname(os.path.realpath(__file__)), "bridge.lua"))(
+    vars.lua_kobold, vars.lua_koboldcore, vars.lua_koboldbridge = vars.lua_state.globals().dofile("bridge.lua")(
         vars.lua_state.globals().python,
         bridged,
     )
@@ -1789,7 +2541,7 @@ def do_connect():
     emit('from_server', {'cmd': 'setchatname', 'data': vars.chatname})
     emit('from_server', {'cmd': 'setanotetemplate', 'data': vars.authornotetemplate})
     emit('from_server', {'cmd': 'connected', 'smandelete': vars.smandelete, 'smanrename': vars.smanrename, 'modelname': getmodelname()})
-    if(vars.remote):
+    if(vars.host):
         emit('from_server', {'cmd': 'runs_remotely'})
     if(vars.allowsp):
         emit('from_server', {'cmd': 'allowsp', 'data': vars.allowsp})
@@ -1835,7 +2587,8 @@ def do_connect():
 #==================================================================#
 @socketio.on('message')
 def get_message(msg):
-    print("{0}Data received:{1}{2}".format(colors.GREEN, msg, colors.END))
+    if not vars.quiet:
+        print("{0}Data received:{1}{2}".format(colors.GREEN, msg, colors.END))
     # Submit action
     if(msg['cmd'] == 'submit'):
         if(vars.mode == "play"):
@@ -1873,7 +2626,10 @@ def get_message(msg):
         actionretry(msg['data'])
     # Back/Undo Action
     elif(msg['cmd'] == 'back'):
-        actionback()
+        ignore = actionback()
+    # Forward/Redo Action
+    elif(msg['cmd'] == 'redo'):
+        actionredo()
     # EditMode Action (old)
     elif(msg['cmd'] == 'edit'):
         if(vars.mode == "play"):
@@ -1895,13 +2651,13 @@ def get_message(msg):
         deleterequest()
     elif(msg['cmd'] == 'memory'):
         togglememorymode()
-    elif(not vars.remote and msg['cmd'] == 'savetofile'):
+    elif(not vars.host and msg['cmd'] == 'savetofile'):
         savetofile()
-    elif(not vars.remote and msg['cmd'] == 'loadfromfile'):
+    elif(not vars.host and msg['cmd'] == 'loadfromfile'):
         loadfromfile()
     elif(msg['cmd'] == 'loadfromstring'):
         loadRequest(json.loads(msg['data']), filename=msg['filename'])
-    elif(not vars.remote and msg['cmd'] == 'import'):
+    elif(not vars.host and msg['cmd'] == 'import'):
         importRequest()
     elif(msg['cmd'] == 'newgame'):
         newGameRequest()
@@ -1927,6 +2683,11 @@ def get_message(msg):
         emit('from_server', {'cmd': 'setlabeltfs', 'data': msg['data']}, broadcast=True)
         settingschanged()
         refresh_settings()
+    elif(msg['cmd'] == 'settypical'):
+        vars.typical = float(msg['data'])
+        emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
+        settingschanged()
+        refresh_settings()
     elif(msg['cmd'] == 'setreppen'):
         vars.rep_pen = float(msg['data'])
         emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
@@ -2101,7 +2862,6 @@ def get_message(msg):
         loadRequest(fileops.storypath(vars.loadselect))
     elif(msg['cmd'] == 'sprequest'):
         spRequest(vars.spselect)
-        emit('from_server', {'cmd': 'spstatitems', 'data': {vars.spfilename: vars.spmeta} if vars.allowsp and len(vars.spfilename) else {}}, broadcast=True)
     elif(msg['cmd'] == 'deletestory'):
         deletesave(msg['data'])
     elif(msg['cmd'] == 'renamestory'):
@@ -2111,6 +2871,8 @@ def get_message(msg):
         vars.saveow   = False
     elif(msg['cmd'] == 'seqsel'):
         selectsequence(msg['data'])
+    elif(msg['cmd'] == 'seqpin'):
+        pinsequence(msg['data'])
     elif(msg['cmd'] == 'setnumseq'):
         vars.numseqs = int(msg['data'])
         emit('from_server', {'cmd': 'setlabelnumseq', 'data': msg['data']})
@@ -2155,8 +2917,13 @@ def get_message(msg):
         vars.nogenmod = msg['data']
         settingschanged()
         refresh_settings()
-    elif(not vars.remote and msg['cmd'] == 'importwi'):
+    elif(not vars.host and msg['cmd'] == 'importwi'):
         wiimportrequest()
+    elif(msg['cmd'] == 'debug'):
+        vars.debug = msg['data']
+        emit('from_server', {'cmd': 'set_debug', 'data': msg['data']}, broadcast=True)
+        if vars.debug:
+            send_debug()
 
 #==================================================================#
 #  Send userscripts list to client
@@ -2210,152 +2977,6 @@ def sendsettings():
         if(not frm["id"] in vars.formatoptns):
             vars.formatoptns[frm["id"]] = False;
 
-#==================================================================#
-#  Take settings from vars and write them to client settings file
-#==================================================================#
-def savesettings():
-     # Build json to write
-    js = {}
-    js["apikey"]      = vars.apikey
-    js["andepth"]     = vars.andepth
-    js["temp"]        = vars.temp
-    js["top_p"]       = vars.top_p
-    js["top_k"]       = vars.top_k
-    js["tfs"]         = vars.tfs
-    js["rep_pen"]     = vars.rep_pen
-    js["rep_pen_slope"] = vars.rep_pen_slope
-    js["rep_pen_range"] = vars.rep_pen_range
-    js["genamt"]      = vars.genamt
-    js["max_length"]  = vars.max_length
-    js["ikgen"]       = vars.ikgen
-    js["formatoptns"] = vars.formatoptns
-    js["numseqs"]     = vars.numseqs
-    js["widepth"]     = vars.widepth
-    js["useprompt"]   = vars.useprompt
-    js["adventure"]   = vars.adventure
-    js["chatmode"]    = vars.chatmode
-    js["chatname"]    = vars.chatname
-    js["dynamicscan"] = vars.dynamicscan
-    js["nopromptgen"] = vars.nopromptgen
-    js["rngpersist"]  = vars.rngpersist
-    js["nogenmod"]    = vars.nogenmod
-    js["autosave"]    = vars.autosave
-    js["welcome"]     = vars.welcome
-    js["newlinemode"] = vars.newlinemode
-
-    js["antemplate"]  = vars.setauthornotetemplate
-
-    js["userscripts"] = vars.userscripts
-    js["corescript"]  = vars.corescript
-    js["softprompt"]  = vars.spfilename
-
-    # Write it
-    if not os.path.exists('settings'):
-        os.mkdir('settings')
-    file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "w")
-    try:
-        file.write(json.dumps(js, indent=3))
-    finally:
-        file.close()
-
-#==================================================================#
-#  Read settings from client file JSON and send to vars
-#==================================================================#
-def loadsettings():
-    if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
-        # Read file contents into JSON object
-        file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
-        js   = json.load(file)
-        
-        # Copy file contents to vars
-        if("apikey" in js):
-            vars.apikey     = js["apikey"]
-        if("andepth" in js):
-            vars.andepth    = js["andepth"]
-        if("temp" in js):
-            vars.temp       = js["temp"]
-        if("top_p" in js):
-            vars.top_p      = js["top_p"]
-        if("top_k" in js):
-            vars.top_k      = js["top_k"]
-        if("tfs" in js):
-            vars.tfs        = js["tfs"]
-        if("rep_pen" in js):
-            vars.rep_pen    = js["rep_pen"]
-        if("rep_pen_slope" in js):
-            vars.rep_pen_slope = js["rep_pen_slope"]
-        if("rep_pen_range" in js):
-            vars.rep_pen_range = js["rep_pen_range"]
-        if("genamt" in js):
-            vars.genamt     = js["genamt"]
-        if("max_length" in js):
-            vars.max_length = js["max_length"]
-        if("ikgen" in js):
-            vars.ikgen      = js["ikgen"]
-        if("formatoptns" in js):
-            vars.formatoptns = js["formatoptns"]
-        if("numseqs" in js):
-            vars.numseqs = js["numseqs"]
-        if("widepth" in js):
-            vars.widepth = js["widepth"]
-        if("useprompt" in js):
-            vars.useprompt = js["useprompt"]
-        if("adventure" in js):
-            vars.adventure = js["adventure"]
-        if("chatmode" in js):
-            vars.chatmode = js["chatmode"]
-        if("chatname" in js):
-            vars.chatname = js["chatname"]
-        if("dynamicscan" in js):
-            vars.dynamicscan = js["dynamicscan"]
-        if("nopromptgen" in js):
-            vars.nopromptgen = js["nopromptgen"]
-        if("rngpersist" in js):
-            vars.rngpersist = js["rngpersist"]
-        if("nogenmod" in js):
-            vars.nogenmod = js["nogenmod"]
-        if("autosave" in js):
-            vars.autosave = js["autosave"]
-        if("newlinemode" in js):
-            vars.newlinemode = js["newlinemode"]
-        if("welcome" in js):
-            vars.welcome = js["welcome"]
-
-        if("antemplate" in js):
-            vars.setauthornotetemplate = js["antemplate"]
-            if(not vars.gamestarted):
-                vars.authornotetemplate = vars.setauthornotetemplate
-        
-        if("userscripts" in js):
-            vars.userscripts = []
-            for userscript in js["userscripts"]:
-                if type(userscript) is not str:
-                    continue
-                userscript = userscript.strip()
-                if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
-                    vars.userscripts.append(userscript)
-
-        if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
-            vars.corescript = js["corescript"]
-        else:
-            vars.corescript = "default.lua"
-        
-        if(vars.allowsp and "softprompt" in js and type(js["softprompt"]) is str and all(q not in js["softprompt"] for q in ("..", ":")) and (len(js["softprompt"]) == 0 or all(js["softprompt"][0] not in q for q in ("/", "\\")))):
-            spRequest(js["softprompt"])
-        else:
-            vars.spfilename = ""
-
-        file.close()
-
-
-#==================================================================#
-#  Don't save settings unless 2 seconds have passed without modification
-#==================================================================#
-@debounce(2)
-def settingschanged():
-    print("{0}Saving settings!{1}".format(colors.GREEN, colors.END))
-    savesettings()
-
 #==================================================================#
 #  Set value of gamesaved
 #==================================================================#
@@ -2408,10 +3029,6 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
             if(len(data)):
                 data = f"\n{vars.chatname}: {data}\n"
         
-        # </s> mode
-        if(vars.newlinemode == "s"):
-            data = data.replace('\n', "</s>")
-        
         # If we're not continuing, store a copy of the raw input
         if(data != ""):
             vars.lastact = data
@@ -2485,7 +3102,25 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
                     vars.prompt = data
                 else:
                     vars.actions.append(data)
+                    # we now need to update the actions_metadata
+                    # we'll have two conditions. 
+                    # 1. This is totally new (user entered) 
+                    if vars.actions.get_last_key() not in vars.actions_metadata:
+                        vars.actions_metadata[vars.actions.get_last_key()] = {"Selected Text": data, "Alternative Text": []}
+                    else:
+                    # 2. We've selected a chunk of text that is was presented previously
+                        try:
+                            alternatives = [item['Text'] for item in vars.actions_metadata[len(vars.actions)-1]["Alternative Text"]]
+                        except:
+                            print(len(vars.actions))
+                            print(vars.actions_metadata)
+                            raise
+                        if data in alternatives:
+                            alternatives = [item for item in vars.actions_metadata[vars.actions.get_last_key() ]["Alternative Text"] if item['Text'] != data]
+                            vars.actions_metadata[vars.actions.get_last_key()]["Alternative Text"] = alternatives
+                        vars.actions_metadata[vars.actions.get_last_key()]["Selected Text"] = data
                 update_story_chunk('last')
+                send_debug()
 
             if(not vars.noai and vars.lua_koboldbridge.generating):
                 # Off to the tokenizer!
@@ -2532,21 +3167,13 @@ def actionretry(data):
     if(vars.noai):
         emit('from_server', {'cmd': 'errmsg', 'data': "Retry function unavailable in Read Only mode."})
         return
-    if(vars.aibusy):
-        return
     if(vars.recentrng is not None):
-        randomGameRequest(vars.recentrng, memory=vars.recentrngm)
+        if(not vars.aibusy):
+            randomGameRequest(vars.recentrng, memory=vars.recentrngm)
         return
-    # Remove last action if possible and resubmit
-    if(vars.gamestarted if vars.useprompt else len(vars.actions) > 0):
-        if(not vars.recentback and len(vars.actions) != 0 and len(vars.genseqs) == 0):  # Don't pop if we're in the "Select sequence to keep" menu or if there are no non-prompt actions
-            last_key = vars.actions.get_last_key()
-            vars.actions.pop()
-            remove_story_chunk(last_key + 1)
-        vars.recentback = False
-        vars.recentedit = False
-        vars.lua_koboldbridge.feedback = None
+    if actionback():
         actionsubmit("", actionmode=vars.actionmode, force_submit=True)
+        send_debug()
     elif(not vars.useprompt):
         emit('from_server', {'cmd': 'errmsg', 'data': "Please enable \"Always Add Prompt\" to retry with your prompt."})
 
@@ -2558,14 +3185,68 @@ def actionback():
         return
     # Remove last index of actions and refresh game screen
     if(len(vars.genseqs) == 0 and len(vars.actions) > 0):
+        # We are going to move the selected text to alternative text in the actions_metadata variable so we can redo this action
+        vars.actions_metadata[vars.actions.get_last_key() ]['Alternative Text'] = [{'Text': vars.actions_metadata[vars.actions.get_last_key() ]['Selected Text'],
+                                                                    'Pinned': False,
+                                                                    "Previous Selection": True,
+                                                                    "Edited": False}] + vars.actions_metadata[vars.actions.get_last_key() ]['Alternative Text']
+        vars.actions_metadata[vars.actions.get_last_key() ]['Selected Text'] = ""
+    
         last_key = vars.actions.get_last_key()
         vars.actions.pop()
         vars.recentback = True
         remove_story_chunk(last_key + 1)
+        #for the redo to not get out of whack, need to reset the max # in the actions sequence
+        vars.actions.set_next_id(last_key)
+        success = True
     elif(len(vars.genseqs) == 0):
         emit('from_server', {'cmd': 'errmsg', 'data': "Cannot delete the prompt."})
+        success =  False
     else:
         vars.genseqs = []
+        success = True
+    send_debug()
+    return success
+        
+def actionredo():
+    i = 0
+    #First we need to find the next valid key
+    #We might have deleted text so we don't want to show a redo for that blank chunk
+    
+    restore_id = vars.actions.get_last_key()+1
+    if restore_id in vars.actions_metadata:
+        ok_to_use = False
+        while not ok_to_use:
+            for item in vars.actions_metadata[restore_id]['Alternative Text']:
+                if item['Previous Selection'] and item['Text'] != "":
+                    ok_to_use = True
+            if not ok_to_use:
+                restore_id+=1
+                if restore_id not in vars.actions_metadata:
+                    return
+            else:
+                vars.actions.set_next_id(restore_id)
+                
+    
+    if restore_id in vars.actions_metadata:
+        genout = [{"generated_text": item['Text']} for item in vars.actions_metadata[restore_id]['Alternative Text'] if (item["Previous Selection"]==True)]
+        if len(genout) > 0:
+            genout = genout + [{"generated_text": item['Text']} for item in vars.actions_metadata[restore_id]['Alternative Text'] if (item["Pinned"]==True) and (item["Previous Selection"]==False)]
+            if len(genout) == 1:
+                vars.actions_metadata[restore_id]['Alternative Text'] = [item for item in vars.actions_metadata[restore_id]['Alternative Text'] if (item["Previous Selection"]!=True)]
+                genresult(genout[0]['generated_text'], flash=True, ignore_formatting=True)
+            else:
+                # Store sequences in memory until selection is made
+                vars.genseqs = genout
+                
+                
+                # Send sequences to UI for selection
+                genout = [[item['Text'], "redo"] for item in vars.actions_metadata[restore_id]['Alternative Text'] if (item["Previous Selection"]==True)]
+                
+                emit('from_server', {'cmd': 'genseqs', 'data': genout}, broadcast=True)
+    else:
+        emit('from_server', {'cmd': 'popuperror', 'data': "There's nothing to undo"}, broadcast=True)
+    send_debug()
 
 #==================================================================#
 #  
@@ -2599,24 +3280,24 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
     if("tokenizer" not in globals()):
         from transformers import GPT2TokenizerFast
         global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
 
     # Calculate token budget
-    prompttkns = tokenizer.encode(vars.comregex_ai.sub('', vars.prompt), max_length=int(2e9), truncation=True)
+    prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
     lnprompt   = len(prompttkns)
 
-    memtokens = tokenizer.encode(mem, max_length=int(2e9), truncation=True)
+    memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
     lnmem     = len(memtokens)
     if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
         raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
 
-    witokens  = tokenizer.encode(winfo, max_length=int(2e9), truncation=True)
+    witokens  = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
     lnwi      = len(witokens)
     if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
         raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
 
     if(anotetxt != ""):
-        anotetkns = tokenizer.encode(anotetxt, max_length=int(2e9), truncation=True)
+        anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
         lnanote   = len(anotetkns)
         if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
             raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
@@ -2626,7 +3307,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
     else:
         budget = vars.max_length - lnsp - lnmem - lnanote - lnwi - vars.genamt - budget_deduction
 
-    lnsubmission = len(tokenizer.encode(vars.comregex_ai.sub('', submission), max_length=int(2e9), truncation=True)) if submission is not None else 0
+    lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
     maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
 
     if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
@@ -2655,7 +3336,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
             assert budget >= 0
             if(budget <= 0):
                 break
-            acttkns = tokenizer.encode(chunk, max_length=int(2e9), truncation=True)
+            acttkns = tokenizer.encode(utils.encodenewlines(chunk), max_length=int(2e9), truncation=True)
             tknlen = len(acttkns)
             if(tknlen < budget):
                 tokens = acttkns + tokens
@@ -2711,22 +3392,22 @@ def calcsubmit(txt):
     if(vars.model != "InferKit"):
         subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt)
         if(actionlen == 0):
-            if(not vars.model in ["Colab", "OAI", "TPUMeshTransformerGPTJ"]):
+            if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
                 generate(subtxt, min, max, found_entries=found_entries)
             elif(vars.model == "Colab"):
-                sendtocolab(tokenizer.decode(subtxt), min, max)
+                sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
             elif(vars.model == "OAI"):
-                oairequest(tokenizer.decode(subtxt), min, max)
-            elif(vars.model == "TPUMeshTransformerGPTJ"):
+                oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
+            elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
                 tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
         else:
-            if(not vars.model in ["Colab", "OAI", "TPUMeshTransformerGPTJ"]):
+            if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
                 generate(subtxt, min, max, found_entries=found_entries)
             elif(vars.model == "Colab"):
-                sendtocolab(tokenizer.decode(subtxt), min, max)
+                sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
             elif(vars.model == "OAI"):
-                oairequest(tokenizer.decode(subtxt), min, max)
-            elif(vars.model == "TPUMeshTransformerGPTJ"):
+                oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
+            elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
                 tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
                     
     # For InferKit web API
@@ -2820,7 +3501,6 @@ def _generate(txt, minimum, maximum, found_entries):
             genout = generator(
                 gen_in, 
                 do_sample=True, 
-                min_length=minimum, 
                 max_length=int(2e9),
                 repetition_penalty=1.1,
                 bad_words_ids=vars.badwordsids,
@@ -2843,7 +3523,7 @@ def _generate(txt, minimum, maximum, found_entries):
                     genout[r][genout.shape[-1] - already_generated + c] = vars.lua_koboldbridge.generated[r+1][c+1]
             encoded = []
             for i in range(vars.numseqs):
-                txt = tokenizer.decode(genout[i, -already_generated:])
+                txt = utils.decodenewlines(tokenizer.decode(genout[i, -already_generated:]))
                 winfo, mem, anotetxt, _found_entries = calcsubmitbudgetheader(txt, force_use_txt=True, actions=vars._actions)
                 found_entries[i].update(_found_entries)
                 txt, _, _ = calcsubmitbudget(len(vars._actions), winfo, mem, anotetxt, vars._actions, submission=txt)
@@ -2881,10 +3561,11 @@ def generate(txt, minimum, maximum, found_entries=None):
         found_entries = set()
     found_entries = tuple(found_entries.copy() for _ in range(vars.numseqs))
 
-    print("{0}Min:{1}, Max:{2}, Txt:{3}{4}".format(colors.YELLOW, minimum, maximum, tokenizer.decode(txt), colors.END))
+    if not vars.quiet:
+        print("{0}Min:{1}, Max:{2}, Txt:{3}{4}".format(colors.YELLOW, minimum, maximum, utils.decodenewlines(tokenizer.decode(txt)), colors.END))
 
     # Store context in memory to use it for comparison with generated content
-    vars.lastctx = tokenizer.decode(txt)
+    vars.lastctx = utils.decodenewlines(tokenizer.decode(txt))
 
     # Clear CUDA cache if using GPU
     if(vars.hascuda and (vars.usegpu or vars.breakmodel)):
@@ -2911,7 +3592,7 @@ def generate(txt, minimum, maximum, found_entries=None):
 
     for i in range(vars.numseqs):
         vars.lua_koboldbridge.generated[i+1][vars.generated_tkns] = int(genout[i, -1].item())
-        vars.lua_koboldbridge.outputs[i+1] = tokenizer.decode(genout[i, -already_generated:])
+        vars.lua_koboldbridge.outputs[i+1] = utils.decodenewlines(tokenizer.decode(genout[i, -already_generated:]))
 
     execute_outmod()
     if(vars.lua_koboldbridge.regeneration_required):
@@ -2921,7 +3602,7 @@ def generate(txt, minimum, maximum, found_entries=None):
             genout.append({"generated_text": vars.lua_koboldbridge.outputs[i+1]})
             assert type(genout[-1]["generated_text"]) is str
     else:
-        genout = [{"generated_text": tokenizer.decode(tokens[-already_generated:])} for tokens in genout]
+        genout = [{"generated_text": utils.decodenewlines(tokenizer.decode(tokens[-already_generated:]))} for tokens in genout]
     
     if(len(genout) == 1):
         genresult(genout[0]["generated_text"])
@@ -2942,11 +3623,13 @@ def generate(txt, minimum, maximum, found_entries=None):
 #==================================================================#
 #  Deal with a single return sequence from generate()
 #==================================================================#
-def genresult(genout, flash=True):
-    print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
+def genresult(genout, flash=True, ignore_formatting=False):
+    if not vars.quiet:
+        print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
     
     # Format output before continuing
-    genout = applyoutputformatting(genout)
+    if not ignore_formatting:
+        genout = applyoutputformatting(genout)
 
     vars.lua_koboldbridge.feedback = genout
 
@@ -2958,9 +3641,14 @@ def genresult(genout, flash=True):
         vars.prompt = genout
     else:
         vars.actions.append(genout)
+        if vars.actions.get_last_key() not in vars.actions_metadata:
+            vars.actions_metadata[vars.actions.get_last_key()] = {'Selected Text': genout, 'Alternative Text': []}
+        else:
+            vars.actions_metadata[vars.actions.get_last_key()]['Selected Text'] = genout
     update_story_chunk('last')
     if(flash):
         emit('from_server', {'cmd': 'texteffect', 'data': vars.actions.get_last_key() + 1 if len(vars.actions) else 0}, broadcast=True)
+    send_debug()
 
 #==================================================================#
 #  Send generator sequences to the UI for selection
@@ -2970,14 +3658,34 @@ def genselect(genout):
     for result in genout:
         # Apply output formatting rules to sequences
         result["generated_text"] = applyoutputformatting(result["generated_text"])
-        print("{0}[Result {1}]\n{2}{3}".format(colors.CYAN, i, result["generated_text"], colors.END))
+        if not vars.quiet:
+            print("{0}[Result {1}]\n{2}{3}".format(colors.CYAN, i, result["generated_text"], colors.END))
         i += 1
     
+    # Add the options to the actions metadata
+    # If we've already generated text for this action but haven't selected one we'll want to kill all non-pinned, non-previous selection, and non-edited options then add the new ones
+    if vars.actions.get_next_id() in vars.actions_metadata:
+        if (vars.actions_metadata[vars.actions.get_next_id()]['Selected Text'] == ""):
+            vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text'] = [{"Text": item['Text'], "Pinned": item['Pinned'], 
+                                                                             "Previous Selection": item["Previous Selection"], 
+                                                                             "Edited": item["Edited"]} for item in vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text'] 
+                                                                             if item['Pinned'] or item["Previous Selection"] or item["Edited"]] + [{"Text": text["generated_text"], 
+                                                                                    "Pinned": False, "Previous Selection": False, "Edited": False} for text in genout]
+        else:
+            vars.actions_metadata[vars.actions.get_next_id()] = {'Selected Text': '', 'Alternative Text': [{"Text": text["generated_text"], "Pinned": False, "Previous Selection": False, "Edited": False} for text in genout]}
+    else:
+        vars.actions_metadata[vars.actions.get_next_id()] = {'Selected Text': '', 'Alternative Text': [{"Text": text["generated_text"], "Pinned": False, "Previous Selection": False, "Edited": False} for text in genout]}
+    
+    genout = [{"generated_text": item['Text']} for item in vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text'] if (item["Previous Selection"]==False) and (item["Edited"]==False)]
+
     # Store sequences in memory until selection is made
     vars.genseqs = genout
     
+    genout = [[item['Text'], "pinned" if item['Pinned'] else "normal"] for item in vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text']  if (item["Previous Selection"]==False) and (item["Edited"]==False)]
+
     # Send sequences to UI for selection
     emit('from_server', {'cmd': 'genseqs', 'data': genout}, broadcast=True)
+    send_debug()
 
 #==================================================================#
 #  Send selected sequence to action log and refresh UI
@@ -2988,6 +3696,9 @@ def selectsequence(n):
     vars.lua_koboldbridge.feedback = vars.genseqs[int(n)]["generated_text"]
     if(len(vars.lua_koboldbridge.feedback) != 0):
         vars.actions.append(vars.lua_koboldbridge.feedback)
+        #We'll want to remove the option from the alternative text and put it in selected text
+        vars.actions_metadata[vars.actions.get_last_key() ]['Alternative Text'] = [item for item in vars.actions_metadata[vars.actions.get_last_key()]['Alternative Text'] if item['Text'] != vars.lua_koboldbridge.feedback]
+        vars.actions_metadata[vars.actions.get_last_key() ]['Selected Text'] = vars.lua_koboldbridge.feedback
         update_story_chunk('last')
         emit('from_server', {'cmd': 'texteffect', 'data': vars.actions.get_last_key() + 1 if len(vars.actions) else 0}, broadcast=True)
     emit('from_server', {'cmd': 'hidegenseqs', 'data': ''}, broadcast=True)
@@ -2995,13 +3706,31 @@ def selectsequence(n):
 
     if(vars.lua_koboldbridge.restart_sequence is not None):
         actionsubmit("", actionmode=vars.actionmode, force_submit=True, disable_recentrng=True)
+    send_debug()
+
+#==================================================================#
+#  Pin/Unpin the selected sequence
+#==================================================================#
+def pinsequence(n):
+    if n.isnumeric():
+        text = vars.genseqs[int(n)]['generated_text']
+        if text in [item['Text'] for item in vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text']]:
+            alternatives = vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text']
+            for i in range(len(alternatives)):
+                if alternatives[i]['Text'] == text:
+                    alternatives[i]['Pinned'] = not alternatives[i]['Pinned']
+                    break
+            vars.actions_metadata[vars.actions.get_next_id()]['Alternative Text'] = alternatives
+    send_debug()
+
 
 #==================================================================#
 #  Send transformers-style request to ngrok/colab host
 #==================================================================#
 def sendtocolab(txt, min, max):
     # Log request to console
-    print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
+    if not vars.quiet:
+        print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END))
     
     # Store context in memory to use it for comparison with generated content
     vars.lastctx = txt
@@ -3018,6 +3747,7 @@ def sendtocolab(txt, min, max):
         'top_p': vars.top_p,
         'top_k': vars.top_k,
         'tfs': vars.tfs,
+        'typical': vars.typical,
         'numseqs': vars.numseqs,
         'retfultxt': False
     }
@@ -3086,7 +3816,8 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
         found_entries = set()
     found_entries = tuple(found_entries.copy() for _ in range(vars.numseqs))
 
-    print("{0}Min:{1}, Max:{2}, Txt:{3}{4}".format(colors.YELLOW, minimum, maximum, tokenizer.decode(txt), colors.END))
+    if not vars.quiet:
+        print("{0}Min:{1}, Max:{2}, Txt:{3}{4}".format(colors.YELLOW, minimum, maximum, utils.decodenewlines(tokenizer.decode(txt)), colors.END))
 
     vars._actions = vars.actions
     vars._prompt = vars.prompt
@@ -3129,7 +3860,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
 
                 encoded = []
                 for i in range(vars.numseqs):
-                    txt = tokenizer.decode(past[i])
+                    txt = utils.decodenewlines(tokenizer.decode(past[i]))
                     winfo, mem, anotetxt, _found_entries = calcsubmitbudgetheader(txt, force_use_txt=True, actions=vars._actions)
                     found_entries[i].update(_found_entries)
                     txt, _, _ = calcsubmitbudget(len(vars._actions), winfo, mem, anotetxt, vars._actions, submission=txt)
@@ -3153,6 +3884,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
                 top_p=vars.top_p,
                 top_k=vars.top_k,
                 tfs=vars.tfs,
+                typical=vars.typical,
                 numseqs=vars.numseqs,
                 repetition_penalty=vars.rep_pen,
                 rpslope=vars.rep_pen_slope,
@@ -3181,7 +3913,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
         return
 
     for i in range(vars.numseqs):
-        vars.lua_koboldbridge.outputs[i+1] = tokenizer.decode(past[i])
+        vars.lua_koboldbridge.outputs[i+1] = utils.decodenewlines(tokenizer.decode(past[i]))
     genout = past
 
     execute_outmod()
@@ -3192,7 +3924,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
             genout.append({"generated_text": vars.lua_koboldbridge.outputs[i+1]})
             assert type(genout[-1]["generated_text"]) is str
     else:
-        genout = [{"generated_text": tokenizer.decode(txt)} for txt in genout]
+        genout = [{"generated_text": utils.decodenewlines(tokenizer.decode(txt))} for txt in genout]
 
     if(len(genout) == 1):
         genresult(genout[0]["generated_text"])
@@ -3220,14 +3952,14 @@ def getnewcontent(txt):
         return txt
     
     # Tokenize the last context and the generated content
-    ctxtokens = tokenizer.encode(vars.lastctx, max_length=int(2e9), truncation=True)
-    txttokens = tokenizer.encode(txt, max_length=int(2e9), truncation=True)
+    ctxtokens = tokenizer.encode(utils.encodenewlines(vars.lastctx), max_length=int(2e9), truncation=True)
+    txttokens = tokenizer.encode(utils.encodenewlines(txt), max_length=int(2e9), truncation=True)
     dif       = (len(txttokens) - len(ctxtokens)) * -1
     
     # Remove the context from the returned text
     newtokens = txttokens[dif:]
     
-    return tokenizer.decode(newtokens)
+    return utils.decodenewlines(tokenizer.decode(newtokens))
 
 #==================================================================#
 # Applies chosen formatting options to text submitted to AI
@@ -3243,9 +3975,6 @@ def applyinputformatting(txt):
 # Applies chosen formatting options to text returned from AI
 #==================================================================#
 def applyoutputformatting(txt):
-    # Revert S mode on output to maintain compatibility
-    txt = txt.replace('</s>', "\n")
-
     # Use standard quotes and apostrophes
     txt = utils.fixquotes(txt)
 
@@ -3341,6 +4070,7 @@ def refresh_settings():
         emit('from_server', {'cmd': 'updatetopp', 'data': vars.top_p}, broadcast=True)
         emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
         emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
+        emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
         emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
         emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
         emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)
@@ -3404,12 +4134,17 @@ def editsubmit(data):
     if(vars.editln == 0):
         vars.prompt = data
     else:
+        vars.actions_metadata[vars.editln-1]['Alternative Text'] = vars.actions_metadata[vars.editln-1]['Alternative Text'] + [{"Text": vars.actions[vars.editln-1], "Pinned": False, 
+                                                                         "Previous Selection": False, 
+                                                                         "Edited": True}]
+        vars.actions_metadata[vars.editln-1]['Selected Text'] = data
         vars.actions[vars.editln-1] = data
     
     vars.mode = "play"
     update_story_chunk(vars.editln)
     emit('from_server', {'cmd': 'texteffect', 'data': vars.editln}, broadcast=True)
     emit('from_server', {'cmd': 'editmode', 'data': 'false'})
+    send_debug()
 
 #==================================================================#
 #  
@@ -3421,10 +4156,14 @@ def deleterequest():
         # Send error message
         pass
     else:
-        del vars.actions[vars.editln-1]
+        vars.actions_metadata[vars.editln-1]['Alternative Text'] = [{"Text": vars.actions[vars.editln-1], "Pinned": False, 
+                                                      "Previous Selection": True, "Edited": False}] + vars.actions_metadata[vars.editln-1]['Alternative Text']
+        vars.actions_metadata[vars.editln-1]['Selected Text'] = ''
+        vars.actions[vars.editln-1] = ''
         vars.mode = "play"
         remove_story_chunk(vars.editln)
         emit('from_server', {'cmd': 'editmode', 'data': 'false'})
+    send_debug()
 
 #==================================================================#
 # 
@@ -3438,6 +4177,10 @@ def inlineedit(chunk, data):
         vars.prompt = data
     else:
         if(chunk-1 in vars.actions):
+            vars.actions_metadata[chunk-1]['Alternative Text'] = vars.actions_metadata[chunk-1]['Alternative Text'] + [{"Text": vars.actions[chunk-1], "Pinned": False, 
+                                                                             "Previous Selection": False, 
+                                                                             "Edited": True}]
+            vars.actions_metadata[chunk-1]['Selected Text'] = data
             vars.actions[chunk-1] = data
         else:
             print(f"WARNING: Attempted to edit non-existent chunk {chunk}")
@@ -3446,6 +4189,7 @@ def inlineedit(chunk, data):
     update_story_chunk(chunk)
     emit('from_server', {'cmd': 'texteffect', 'data': chunk}, broadcast=True)
     emit('from_server', {'cmd': 'editmode', 'data': 'false'}, broadcast=True)
+    send_debug()
 
 #==================================================================#
 #  
@@ -3461,12 +4205,17 @@ def inlinedelete(chunk):
         emit('from_server', {'cmd': 'editmode', 'data': 'false'}, broadcast=True)
     else:
         if(chunk-1 in vars.actions):
-            del vars.actions[chunk-1]
+            vars.actions_metadata[chunk-1]['Alternative Text'] = [{"Text": vars.actions[chunk-1], "Pinned": False, 
+                                                                             "Previous Selection": True, 
+                                                                             "Edited": False}] + vars.actions_metadata[chunk-1]['Alternative Text']
+            vars.actions_metadata[chunk-1]['Selected Text'] = ''
+            vars.actions[chunk-1] = ''
         else:
             print(f"WARNING: Attempted to delete non-existent chunk {chunk}")
         setgamesaved(False)
         remove_story_chunk(chunk)
         emit('from_server', {'cmd': 'editmode', 'data': 'false'}, broadcast=True)
+    send_debug()
 
 #==================================================================#
 #   Toggles the game mode for memory editing and sends UI commands
@@ -3815,7 +4564,8 @@ def anotesubmit(data, template=""):
 #==================================================================#
 def ikrequest(txt):
     # Log request to console
-    print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
+    if not vars.quiet:
+        print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
     
     # Build request JSON data
     reqdata = {
@@ -3852,11 +4602,21 @@ def ikrequest(txt):
             genout = vars.lua_koboldbridge.outputs[1]
             assert genout is str
 
-        print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
+        if not vars.quiet:
+            print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
         vars.actions.append(genout)
+        if vars.actions.get_last_key() in vars.actions_metadata:
+            vars.actions_metadata[vars.actions.get_last_key()] = {"Selected Text": genout, "Alternative Text": []}
+        else:
+        # 2. We've selected a chunk of text that is was presented previously
+            alternatives = [item['Text'] for item in vars.actions_metadata[vars.actions.get_last_key()]["Alternative Text"]]
+            if genout in alternatives:
+                alternatives = [item for item in vars.actions_metadata[vars.actions.get_last_key()]["Alternative Text"] if item['Text'] != genout]
+                vars.actions_metadata[vars.actions.get_last_key()]["Alternative Text"] = alternatives
+            vars.actions_metadata[vars.actions.get_last_key()]["Selected Text"] = genout
         update_story_chunk('last')
         emit('from_server', {'cmd': 'texteffect', 'data': vars.actions.get_last_key() + 1 if len(vars.actions) else 0}, broadcast=True)
-        
+        send_debug()
         set_aibusy(0)
     else:
         # Send error message to web client
@@ -3875,20 +4635,37 @@ def ikrequest(txt):
 #==================================================================#
 def oairequest(txt, min, max):
     # Log request to console
-    print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
+    if not vars.quiet:
+        print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
     
     # Store context in memory to use it for comparison with generated content
     vars.lastctx = txt
     
     # Build request JSON data
-    reqdata = {
-        'prompt': txt,
-        'max_tokens': max,
-        'temperature': vars.temp,
-        'top_p': vars.top_p,
-        'n': 1,
-        'stream': False
-    }
+    if 'GooseAI' in args.configname:
+        reqdata = {
+            'prompt': txt,
+            'max_tokens': vars.genamt,
+            'temperature': vars.temp,
+            'top_p': vars.top_p,
+            'top_k': vars.top_k,
+            'tfs': vars.tfs,
+            'typical_p': vars.typical,
+            'repetition_penalty': vars.rep_pen,
+            'repetition_penalty_slope': vars.rep_pen_slope,
+            'repetition_penalty_range': vars.rep_pen_range,
+            'n': vars.numseqs,
+            'stream': False
+        }
+    else:
+        reqdata = {
+            'prompt': txt,
+            'max_tokens': vars.genamt,
+            'temperature': vars.temp,
+            'top_p': vars.top_p,
+            'n': vars.numseqs,
+            'stream': False
+        }
     
     req = requests.post(
         vars.oaiurl, 
@@ -3901,21 +4678,53 @@ def oairequest(txt, min, max):
     
     # Deal with the response
     if(req.status_code == 200):
-        genout = req.json()["choices"][0]["text"]
+        outputs = [out["text"] for out in req.json()["choices"]]
 
-        vars.lua_koboldbridge.outputs[1] = genout
+        for idx in range(len(outputs)):
+            vars.lua_koboldbridge.outputs[idx+1] = outputs[idx]
 
         execute_outmod()
-        if(vars.lua_koboldbridge.regeneration_required):
+        if (vars.lua_koboldbridge.regeneration_required):
             vars.lua_koboldbridge.regeneration_required = False
-            genout = vars.lua_koboldbridge.outputs[1]
-            assert genout is str
+            genout = []
+            for i in range(len(outputs)):
+                genout.append(
+                    {"generated_text": vars.lua_koboldbridge.outputs[i + 1]})
+                assert type(genout[-1]["generated_text"]) is str
+        else:
+            genout = [
+                {"generated_text": utils.decodenewlines(txt)}
+                for txt in outputs]
+
+        if vars.actions.get_last_key() not in vars.actions_metadata:
+            vars.actions_metadata[vars.actions.get_last_key()] = {
+                "Selected Text": genout[0], "Alternative Text": []}
+        else:
+        # 2. We've selected a chunk of text that is was presented previously
+            try:
+                alternatives = [item['Text'] for item in vars.actions_metadata[len(vars.actions)-1]["Alternative Text"]]
+            except:
+                print(len(vars.actions))
+                print(vars.actions_metadata)
+                raise
+            if genout in alternatives:
+                alternatives = [item for item in vars.actions_metadata[vars.actions.get_last_key() ]["Alternative Text"] if item['Text'] != genout]
+                vars.actions_metadata[vars.actions.get_last_key()]["Alternative Text"] = alternatives
+            vars.actions_metadata[vars.actions.get_last_key()]["Selected Text"] = genout
+
+        if (len(genout) == 1):
+            genresult(genout[0]["generated_text"])
+        else:
+            if (vars.lua_koboldbridge.restart_sequence is not None and
+                    vars.lua_koboldbridge.restart_sequence > 0):
+                genresult(genout[vars.lua_koboldbridge.restart_sequence - 1][
+                              "generated_text"])
+            else:
+                genselect(genout)
+
+        if not vars.quiet:
+            print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
 
-        print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
-        vars.actions.append(genout)
-        update_story_chunk('last')
-        emit('from_server', {'cmd': 'texteffect', 'data': vars.actions.get_last_key() + 1 if len(vars.actions) else 0}, broadcast=True)
-        
         set_aibusy(0)
     else:
         # Send error message to web client            
@@ -3943,12 +4752,15 @@ def exitModes():
 #==================================================================#
 #  Launch in-browser save prompt
 #==================================================================#
-def saveas(name):
+def saveas(data):
+    
+    name = data['name']
+    savepins = data['pins']
     # Check if filename exists already
     name = utils.cleanfilename(name)
     if(not fileops.saveexists(name) or (vars.saveow and vars.svowname == name)):
         # All clear to save
-        e = saveRequest(fileops.storypath(name))
+        e = saveRequest(fileops.storypath(name), savepins=savepins)
         vars.saveow = False
         vars.svowname = ""
         if(e is None):
@@ -4024,7 +4836,7 @@ def savetofile():
 #==================================================================#
 #  Save the story to specified path
 #==================================================================#
-def saveRequest(savpath):    
+def saveRequest(savpath, savepins=True):    
     if(savpath):
         # Leave Edit/Memory mode before continuing
         exitModes()
@@ -4040,6 +4852,8 @@ def saveRequest(savpath):
         js["authorsnote"] = vars.authornote
         js["anotetemplate"] = vars.authornotetemplate
         js["actions"]     = tuple(vars.actions.values())
+        if savepins:
+            js["actions_metadata"]     = vars.actions_metadata
         js["worldinfo"]   = []
         js["wifolders_d"] = vars.wifolders_d
         js["wifolders_l"] = vars.wifolders_l
@@ -4164,6 +4978,38 @@ def loadRequest(loadpath, filename=None):
         del vars.actions
         vars.actions = structures.KoboldStoryRegister()
         actions = collections.deque(js["actions"])
+        
+
+        if "actions_metadata" in js:
+            
+            if type(js["actions_metadata"]) == dict:
+                temp = js["actions_metadata"]
+                vars.actions_metadata = {}
+                #we need to redo the numbering of the actions_metadata since the actions list doesn't preserve it's number on saving
+                if len(temp) > 0:
+                    counter = 0
+                    temp = {int(k):v for k,v in temp.items()}
+                    for i in range(max(temp)+1):
+                        if i in temp:
+                            vars.actions_metadata[counter] = temp[i]
+                            counter += 1
+                del temp
+            else:
+                #fix if we're using the old metadata format
+                vars.actions_metadata = {}
+                i = 0
+                
+                for text in js['actions']:
+                    vars.actions_metadata[i] = {'Selected Text': text, 'Alternative Text': []}
+                    i+=1
+        else:
+            vars.actions_metadata = {}
+            i = 0
+            
+            for text in js['actions']:
+                vars.actions_metadata[i] = {'Selected Text': text, 'Alternative Text': []}
+                i+=1
+                
 
         if(len(vars.prompt.strip()) == 0):
             while(len(actions)):
@@ -4246,60 +5092,8 @@ def loadRequest(loadpath, filename=None):
         emit('from_server', {'cmd': 'setgamestate', 'data': 'ready'}, broadcast=True)
         emit('from_server', {'cmd': 'hidegenseqs', 'data': ''}, broadcast=True)
         print("{0}Story loaded from {1}!{2}".format(colors.GREEN, filename, colors.END))
-
-#==================================================================#
-#  Load a soft prompt from a file
-#==================================================================#
-def spRequest(filename):
-    vars.spfilename = ""
-    settingschanged()
-
-    if(len(filename) == 0):
-        vars.sp = None
-        vars.sp_length = 0
-        return
-
-    global np
-    if 'np' not in globals():
-        import numpy as np
-
-    z, version, shape, fortran_order, dtype = fileops.checksp(filename, vars.modeldim)
-    assert isinstance(z, zipfile.ZipFile)
-    with z.open('meta.json') as f:
-        vars.spmeta = json.load(f)
-    z.close()
-
-    with np.load(fileops.sppath(filename), allow_pickle=False) as f:
-        tensor = f['tensor.npy']
-
-    # If the tensor is in bfloat16 format, convert it to float32
-    if(tensor.dtype == 'V2'):
-        tensor.dtype = np.uint16
-        tensor = np.uint32(tensor) << 16
-        tensor.dtype = np.float32
-
-    if(tensor.dtype != np.float16):
-        tensor = np.float32(tensor)
-    assert not np.isinf(tensor).any() and not np.isnan(tensor).any()
-
-    vars.sp_length = tensor.shape[-2]
-    vars.spmeta["n_tokens"] = vars.sp_length
-
-    if(vars.model in ("TPUMeshTransformerGPTJ",)):
-        rows = tensor.shape[0]
-        padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
-        tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
-        tensor = tensor.reshape(
-            tpu_mtj_backend.params["cores_per_replica"],
-            -1,
-            tpu_mtj_backend.params["d_model"],
-        )
-        vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
-    else:
-        vars.sp = torch.from_numpy(tensor)
-
-    vars.spfilename = filename
-    settingschanged()
+        
+        send_debug()
 
 #==================================================================#
 # Import an AIDungon game exported with Mimi's tool
@@ -4376,6 +5170,7 @@ def importgame():
         vars.authornote  = ref["authorsNote"] if type(ref["authorsNote"]) is str else ""
         vars.authornotetemplate = "[Author's note: <|>]"
         vars.actions     = structures.KoboldStoryRegister()
+        vars.actions_metadata = {}
         vars.worldinfo   = []
         vars.worldinfo_i = []
         vars.worldinfo_u = {}
@@ -4473,6 +5268,7 @@ def importAidgRequest(id):
         vars.authornote  = js["authorsNote"]
         vars.authornotetemplate = "[Author's note: <|>]"
         vars.actions     = structures.KoboldStoryRegister()
+        vars.actions_metadata = {}
         vars.worldinfo   = []
         vars.worldinfo_i = []
         vars.worldinfo_u = {}
@@ -4581,7 +5377,8 @@ def wiimportrequest():
                 if(vars.worldinfo[-1]["folder"] is not None):
                     vars.wifolders_u[vars.worldinfo[-1]["folder"]].append(vars.worldinfo[-1])
         
-        print("{0}".format(vars.worldinfo[0]))
+        if not vars.quiet:
+            print("{0}".format(vars.worldinfo[0]))
                 
         # Refresh game screen
         setgamesaved(False)
@@ -4599,6 +5396,7 @@ def newGameRequest():
     vars.prompt      = ""
     vars.memory      = ""
     vars.actions     = structures.KoboldStoryRegister()
+    vars.actions_metadata = {}
     
     vars.authornote  = ""
     vars.authornotetemplate = vars.setauthornotetemplate
@@ -4643,19 +5441,26 @@ def randomGameRequest(topic, memory=""):
     vars.memory      = memory
     emit('from_server', {'cmd': 'setmemory', 'data': vars.memory}, broadcast=True)
 
-# Load desired settings from both the model and the users config file
-loadsettings()
-
 # Prevent tokenizer from taking extra time the first time it's used
 def __preempt_tokenizer():
     if("tokenizer" not in globals()):
         return
-    tokenizer.decode([25678, 559])
-    tokenizer.encode("eunoia")
+    utils.decodenewlines(tokenizer.decode([25678, 559]))
+    tokenizer.encode(utils.encodenewlines("eunoia"))
 threading.Thread(target=__preempt_tokenizer).start()
 
+# Load soft prompt specified by the settings file, if applicable
+if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
+    file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
+    js   = json.load(file)
+    if(vars.allowsp and "softprompt" in js and type(js["softprompt"]) is str and all(q not in js["softprompt"] for q in ("..", ":")) and (len(js["softprompt"]) == 0 or all(js["softprompt"][0] not in q for q in ("/", "\\")))):
+        spRequest(js["softprompt"])
+    else:
+        vars.spfilename = ""
+    file.close()
+
 # Precompile TPU backend if required
-if(vars.model in ("TPUMeshTransformerGPTJ",)):
+if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
     soft_tokens = tpumtjgetsofttokens()
     if(vars.dynamicscan or (not vars.nogenmod and vars.has_genmod)):
         threading.Thread(
@@ -4682,34 +5487,94 @@ if(vars.model in ("TPUMeshTransformerGPTJ",)):
             },
         ).start()
 
+def send_debug():
+    if vars.debug:
+        debug_info = ""
+        try:
+            debug_info = "{}Newline Mode: {}\n".format(debug_info, vars.newlinemode)
+        except:
+            pass
+        try:
+            debug_info = "{}Action Length: {}\n".format(debug_info, vars.actions.get_last_key())
+        except:
+            pass
+        try:
+            debug_info = "{}Actions Metadata Length: {}\n".format(debug_info, max(vars.actions_metadata) if len(vars.actions_metadata) > 0 else 0)
+        except:
+            pass
+        try:
+            debug_info = "{}Actions: {}\n".format(debug_info, [k for k in vars.actions])
+        except:
+            pass
+        try:
+            debug_info = "{}Actions Metadata: {}\n".format(debug_info, [k for k in vars.actions_metadata])
+        except:
+            pass
+        try:
+            debug_info = "{}Last Action: {}\n".format(debug_info, vars.actions[vars.actions.get_last_key()])
+        except:
+            pass
+        try:
+            debug_info = "{}Last Metadata: {}\n".format(debug_info, vars.actions_metadata[max(vars.actions_metadata)])
+        except:
+            pass
+
+        emit('from_server', {'cmd': 'debug_info', 'data': debug_info}, broadcast=True)
+
 #==================================================================#
 #  Final startup commands to launch Flask app
 #==================================================================#
 print("", end="", flush=True)
 if __name__ == "__main__":
+    port = args.port if "port" in args and args.port is not None else 5000
     print("{0}\nStarting webserver...{1}".format(colors.GREEN, colors.END), flush=True)
 
     # Start Flask/SocketIO (Blocking, so this must be last method!)
-    
-    #socketio.run(app, host='0.0.0.0', port=5000)
-    if(vars.remote):
-        if(args.ngrok):
+
+    #socketio.run(app, host='0.0.0.0', port=port)
+    if(vars.host):
+        if(args.localtunnel):
+            import subprocess, shutil
+            localtunnel = subprocess.Popen([shutil.which('lt'), '-p', str(port), 'http'], stdout=subprocess.PIPE)
+            attempts = 0
+            while attempts < 10:
+                try:
+                    cloudflare = str(localtunnel.stdout.readline())
+                    cloudflare = (re.search("(?P<url>https?:\/\/[^\s]+loca.lt)", cloudflare).group("url"))
+                    break
+                except:
+                    attempts += 1
+                    time.sleep(3)
+                    continue
+            if attempts == 10:
+                print("LocalTunnel could not be created, falling back to cloudflare...")
+                from flask_cloudflared import _run_cloudflared
+                cloudflare = _run_cloudflared(port)
+        elif(args.ngrok):
             from flask_ngrok import _run_ngrok
             cloudflare = _run_ngrok()
-        else:
+        elif(args.remote):
            from flask_cloudflared import _run_cloudflared
-           cloudflare = _run_cloudflared(5000)
-        with open('cloudflare.log', 'w') as cloudflarelog:
-            cloudflarelog.write("KoboldAI has finished loading and is available at the following link : " + cloudflare)
-            print(format(colors.GREEN) + "KoboldAI has finished loading and is available at the following link : " + cloudflare + format(colors.END))
+           cloudflare = _run_cloudflared(port)
+        if(args.localtunnel or args.ngrok or args.remote):
+            with open('cloudflare.log', 'w') as cloudflarelog:
+                cloudflarelog.write("KoboldAI has finished loading and is available at the following link : " + cloudflare)
+                print(format(colors.GREEN) + "KoboldAI has finished loading and is available at the following link : " + cloudflare + format(colors.END))
+        else:
+            print("{0}Webserver has started, you can now connect to this machine at port {1}{2}"
+                  .format(colors.GREEN, port, colors.END))
         vars.serverstarted = True
-        socketio.run(app, host='0.0.0.0', port=5000)
+        socketio.run(app, host='0.0.0.0', port=port)
     else:
         import webbrowser
-        webbrowser.open_new('http://localhost:5000')
-        print("{0}Server started!\nYou may now connect with a browser at http://127.0.0.1:5000/{1}".format(colors.GREEN, colors.END))
+        webbrowser.open_new('http://localhost:{0}'.format(port))
+        print("{0}Server started!\nYou may now connect with a browser at http://127.0.0.1:{1}/{2}"
+              .format(colors.GREEN, port, colors.END))
         vars.serverstarted = True
-        socketio.run(app, port=5000)
+        if args.unblock:
+            socketio.run(app, port=port, host='0.0.0.0')
+        else:
+            socketio.run(app, port=port)
 
 else:
     print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)
diff --git a/breakmodel.py b/breakmodel.py
index 087a112a..eb49e669 100644
--- a/breakmodel.py
+++ b/breakmodel.py
@@ -212,14 +212,17 @@ Copyright 2018 The Hugging Face team
 
 
 import torch
+from torch import nn
 import torch.cuda.comm
 import copy
 import gc
 import sys
 import itertools
 import bisect
+import random
+from typing import Optional
 
-from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPastAndCrossAttentions
 
 from transformers.utils import logging
 logger = logging.get_logger(__name__)
@@ -230,22 +233,40 @@ gpu_blocks = []
 primary_device = 0
 
 
-def move_hidden_layers(transformer):
+# Copied from transformers.models.bart.modeling_bart._expand_mask
+def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
+    """
+    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+    """
+    bsz, src_len = mask.size()
+    tgt_len = tgt_len if tgt_len is not None else src_len
+
+    expanded_mask = mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
+
+    inverted_mask = 1.0 - expanded_mask
+
+    return inverted_mask.masked_fill(inverted_mask.bool(), torch.finfo(dtype).min)
+
+
+def move_hidden_layers(transformer, h=None):
+    if h is None:
+        h = transformer.h
+
     assert len(gpu_blocks) <= torch.cuda.device_count()
-    assert sum(gpu_blocks) <= len(transformer.h)
-    ram_blocks = len(transformer.h) - sum(gpu_blocks)
+    assert sum(gpu_blocks) <= len(h)
+    ram_blocks = len(h) - sum(gpu_blocks)
 
     transformer.extrastorage = {}
     torch.cuda.empty_cache()
     
     able_to_pin_layers = True
     for i in range(ram_blocks):
-        transformer.h[i].to("cpu")
-        transformer.extrastorage[i] = copy.deepcopy(transformer.h[i])
+        h[i].to("cpu")
+        transformer.extrastorage[i] = copy.deepcopy(h[i])
         smalltensor = torch.tensor(0).to(primary_device)
-        for param1 in transformer.h[i].parameters():
+        for param1 in h[i].parameters():
             param1.data = smalltensor
-        transformer.h[i].to(primary_device)
+        h[i].to(primary_device)
         for param in transformer.extrastorage[i].parameters():
             param.requires_grad = False
             param.data = param.data.detach()
@@ -259,34 +280,34 @@ def move_hidden_layers(transformer):
             torch.cuda.empty_cache()
 
     if ram_blocks:
-        for param1,param2 in zip(transformer.h[0].parameters(),transformer.extrastorage[0].parameters()):
+        for param1,param2 in zip(h[0].parameters(),transformer.extrastorage[0].parameters()):
             param1.data = param2.data.to(primary_device, non_blocking=False).detach()
 
-        for param1,param2 in zip(transformer.h[ram_blocks-1].parameters(),transformer.extrastorage[ram_blocks-1].parameters()):
+        for param1,param2 in zip(h[ram_blocks-1].parameters(),transformer.extrastorage[ram_blocks-1].parameters()):
             param1.data = param2.data.to(primary_device, non_blocking=False).detach()
 
     i = ram_blocks
     for j in range(len(gpu_blocks)):
         for _ in range(gpu_blocks[j]):
-            transformer.h[i].to(j)
+            h[i].to(j)
             i += 1
 
 
-def new_forward(
-        self,
-        input_ids=None,
-        past_key_values=None,
-        attention_mask=None,
-        token_type_ids=None,
-        position_ids=None,
-        head_mask=None,
-        inputs_embeds=None,
-        use_cache=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        embs=None,
-    ):
+def new_forward_neo(
+    self,
+    input_ids=None,
+    past_key_values=None,
+    attention_mask=None,
+    token_type_ids=None,
+    position_ids=None,
+    head_mask=None,
+    inputs_embeds=None,
+    use_cache=None,
+    output_attentions=None,
+    output_hidden_states=None,
+    return_dict=None,
+    embs=None,
+):
     assert len(gpu_blocks) <= torch.cuda.device_count()
     assert sum(gpu_blocks) <= len(self.h)
     ram_blocks = len(self.h) - sum(gpu_blocks)
@@ -477,3 +498,365 @@ def new_forward(
         hidden_states=all_hidden_states,
         attentions=all_self_attentions,
     )
+
+
+def new_forward_xglm(
+    self,
+    input_ids=None,
+    attention_mask=None,
+    encoder_hidden_states=None,
+    encoder_attention_mask=None,
+    head_mask=None,
+    cross_attn_head_mask=None,
+    past_key_values=None,
+    inputs_embeds=None,
+    use_cache=None,
+    output_attentions=None,
+    output_hidden_states=None,
+    return_dict=None,
+):
+    assert len(gpu_blocks) <= torch.cuda.device_count()
+    assert sum(gpu_blocks) <= len(self.layers)
+    ram_blocks = len(self.layers) - sum(gpu_blocks)
+    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
+
+
+    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+    output_hidden_states = (
+        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+    )
+    use_cache = use_cache if use_cache is not None else self.config.use_cache
+    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+    # retrieve input_ids and inputs_embeds
+    if input_ids is not None and inputs_embeds is not None:
+        raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+    elif input_ids is not None:
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_shape[-1])
+    elif inputs_embeds is not None:
+        input_shape = inputs_embeds.size()[:-1]
+    else:
+        raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+    # past_key_values_length
+    past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+
+    if inputs_embeds is None:
+        if breakmodel:
+            input_ids = input_ids.to(primary_device)
+        inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+
+    attention_mask = self._prepare_decoder_attention_mask(
+        attention_mask, input_shape, inputs_embeds, past_key_values_length
+    )
+
+    # expand encoder attention mask
+    if encoder_hidden_states is not None and encoder_attention_mask is not None:
+        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+        encoder_attention_mask = _expand_mask(encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1])
+
+    # embed positions
+    if breakmodel:
+        inputs_embeds = inputs_embeds.to(primary_device)
+    positions = self.embed_positions(input_ids, inputs_embeds, past_key_values_length)
+    if breakmodel:
+        positions = positions.to(primary_device)
+
+    hidden_states = inputs_embeds + positions
+
+    hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
+
+    # decoder layers
+    all_hidden_states = () if output_hidden_states else None
+    all_self_attns = () if output_attentions else None
+    all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None
+    next_decoder_cache = () if use_cache else None
+
+    if breakmodel and ram_blocks:
+        copystream = torch.cuda.Stream(device=primary_device, priority=-1)
+
+    # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
+    for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
+        if attn_mask is not None:
+            assert attn_mask.size()[0] == (
+                len(self.layers)
+            ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
+    for idx, decoder_layer in enumerate(self.layers):
+        i = idx
+        if breakmodel:
+            if i in range(ram_blocks):
+                index1 = (i+1)%ram_blocks
+                for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()):
+                    param1.data = param2.data
+                for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()):
+                    with torch.cuda.stream(copystream):
+                        torch.cuda.comm.broadcast(param2.data,out = [param1.data])
+
+        # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        dropout_probability = random.uniform(0, 1)
+        if self.training and (dropout_probability < self.layerdrop):
+            continue
+
+        past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+        if self.gradient_checkpointing and self.training:
+
+            if use_cache:
+                logger.warning(
+                    "`use_cache = True` is incompatible with gradient checkpointing`. Setting `use_cache = False`..."
+                )
+                use_cache = False
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    # None for past_key_value
+                    return module(*inputs, output_attentions, use_cache)
+
+                return custom_forward
+
+            layer_outputs = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(decoder_layer),
+                hidden_states,
+                attention_mask,
+                encoder_hidden_states,
+                encoder_attention_mask,
+                head_mask[idx] if head_mask is not None else None,
+                cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None,
+                None,
+            )
+        else:
+            if breakmodel:
+                device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
+            layer_outputs = decoder_layer(
+                hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
+                attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
+                encoder_hidden_states=encoder_hidden_states.to(device) if breakmodel and encoder_hidden_states is not None else encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask.to(device) if breakmodel and encoder_attention_mask is not None else encoder_attention_mask,
+                layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
+                cross_attn_layer_head_mask=(
+                    (cross_attn_head_mask[idx].to(device) if breakmodel and cross_attn_head_mask[idx] is not None else cross_attn_head_mask[idx]) if cross_attn_head_mask is not None else None
+                ),
+                past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+            )
+        hidden_states = layer_outputs[0]
+
+        if use_cache:
+            next_decoder_cache += (layer_outputs[3 if output_attentions else 1],)
+
+        if output_attentions:
+            all_self_attns += (layer_outputs[1],)
+
+            if encoder_hidden_states is not None:
+                all_cross_attentions += (layer_outputs[2],)
+        
+        if breakmodel:
+            if i in range(ram_blocks):
+                torch.cuda.synchronize()
+                torch.cuda.empty_cache()
+
+    if breakmodel:
+        if ram_blocks:
+            del copystream
+        torch.cuda.empty_cache()
+        hidden_states = hidden_states.to(primary_device)
+    hidden_states = self.layer_norm(hidden_states)
+    if breakmodel:
+        hidden_states = hidden_states.to(primary_device)
+
+    # add hidden states from the last decoder layer
+    if output_hidden_states:
+        all_hidden_states += (hidden_states,)
+
+    next_cache = next_decoder_cache if use_cache else None
+    if not return_dict:
+        return tuple(
+            v
+            for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_cross_attentions]
+            if v is not None
+        )
+    return BaseModelOutputWithPastAndCrossAttentions(
+        last_hidden_state=hidden_states,
+        past_key_values=next_cache,
+        hidden_states=all_hidden_states,
+        attentions=all_self_attns,
+        cross_attentions=all_cross_attentions,
+    )
+
+
+def new_forward_opt(
+    self,
+    input_ids=None,
+    attention_mask=None,
+    head_mask=None,
+    past_key_values=None,
+    inputs_embeds=None,
+    use_cache=None,
+    output_attentions=None,
+    output_hidden_states=None,
+    return_dict=None,
+):
+    assert len(gpu_blocks) <= torch.cuda.device_count()
+    assert sum(gpu_blocks) <= len(self.layers)
+    ram_blocks = len(self.layers) - sum(gpu_blocks)
+    cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
+
+
+    output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+    output_hidden_states = (
+        output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+    )
+    use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+    # retrieve input_ids and inputs_embeds
+    if input_ids is not None and inputs_embeds is not None:
+        raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+    elif input_ids is not None:
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_shape[-1])
+    elif inputs_embeds is not None:
+        input_shape = inputs_embeds.size()[:-1]
+    else:
+        raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+    past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+
+    if inputs_embeds is None:
+        if breakmodel:
+            input_ids = input_ids.to(primary_device) 
+        inputs_embeds = self.embed_tokens(input_ids)
+
+    # embed positions
+    if breakmodel:
+        inputs_embeds = inputs_embeds.to(primary_device) 
+    if attention_mask is None:
+        attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
+
+    positions = self.embed_positions(attention_mask)[:, past_key_values_length:, :]
+    if breakmodel:
+        positions = positions.to(primary_device) 
+
+    attention_mask = self._prepare_decoder_attention_mask(
+        attention_mask, input_shape, inputs_embeds, past_key_values_length
+    )
+
+    if self.project_in is not None:
+        inputs_embeds = self.project_in(inputs_embeds)
+
+    hidden_states = inputs_embeds + positions
+
+    hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
+
+    # decoder layers
+    all_hidden_states = () if output_hidden_states else None
+    all_self_attns = () if output_attentions else None
+    next_decoder_cache = () if use_cache else None
+
+    if breakmodel and ram_blocks:
+        copystream = torch.cuda.Stream(device=primary_device, priority=-1)
+
+    # check if head_mask has a correct number of layers specified if desired
+    for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
+        if attn_mask is not None:
+            if attn_mask.size()[0] != (len(self.layers)):
+                raise ValueError(
+                    f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+                    f" {head_mask.size()[0]}."
+                )
+
+    for idx, decoder_layer in enumerate(self.layers):
+        i = idx
+        if breakmodel:
+            if i in range(ram_blocks):
+                index1 = (i+1)%ram_blocks
+                for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()):
+                    param1.data = param2.data
+                for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()):
+                    with torch.cuda.stream(copystream):
+                        torch.cuda.comm.broadcast(param2.data,out = [param1.data])
+
+        # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        dropout_probability = random.uniform(0, 1)
+        if self.training and (dropout_probability < self.layerdrop):
+            continue
+
+        past_key_value = past_key_values[idx] if past_key_values is not None else None
+
+        if self.gradient_checkpointing and self.training:
+
+            if use_cache:
+                logger.warning(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    # None for past_key_value
+                    return module(*inputs, output_attentions, None)
+
+                return custom_forward
+
+            layer_outputs = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(decoder_layer),
+                hidden_states,
+                attention_mask,
+                head_mask[idx] if head_mask is not None else None,
+                None,
+            )
+        else:
+            if breakmodel:
+                device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
+            layer_outputs = decoder_layer(
+                hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
+                attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
+                layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
+                past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+            )
+
+        hidden_states = layer_outputs[0]
+
+        if use_cache:
+            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+        if output_attentions:
+            all_self_attns += (layer_outputs[1],)
+        
+        if breakmodel:
+            if i in range(ram_blocks):
+                torch.cuda.synchronize()
+                torch.cuda.empty_cache()
+
+    if breakmodel:
+        if ram_blocks:
+            del copystream
+        torch.cuda.empty_cache()
+        hidden_states = hidden_states.to(primary_device)
+    if self.project_out is not None:
+        hidden_states = self.project_out(hidden_states)
+    if breakmodel:
+        hidden_states = hidden_states.to(primary_device)
+
+    # add hidden states from the last decoder layer
+    if output_hidden_states:
+        all_hidden_states += (hidden_states,)
+
+    next_cache = next_decoder_cache if use_cache else None
+    if not return_dict:
+        return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+    return BaseModelOutputWithPast(
+        last_hidden_state=hidden_states,
+        past_key_values=next_cache,
+        hidden_states=all_hidden_states,
+        attentions=all_self_attns,
+    )
diff --git a/bridge.lua b/bridge.lua
index b46977c5..ed0941c6 100644
--- a/bridge.lua
+++ b/bridge.lua
@@ -165,7 +165,7 @@ return function(_python, _bridged)
     ---@field num_outputs integer
     ---@field feedback string
     ---@field is_config_file_open boolean
-    local kobold = setmetatable({API_VERSION = 1.0}, metawrapper)
+    local kobold = setmetatable({API_VERSION = 1.1}, metawrapper)
     local KoboldLib_mt = setmetatable({}, metawrapper)
     local KoboldLib_getters = setmetatable({}, metawrapper)
     local KoboldLib_setters = setmetatable({}, metawrapper)
@@ -866,6 +866,7 @@ return function(_python, _bridged)
     ---@field settopp number
     ---@field settopk integer
     ---@field settfs number
+    ---@field settypical number
     ---@field setreppen number
     ---@field setreppenslope number
     ---@field setreppenrange number
@@ -882,6 +883,7 @@ return function(_python, _bridged)
     ---@field top_p number
     ---@field top_k integer
     ---@field tfs number
+    ---@field typical number
     ---@field reppen number
     ---@field reppenslope number
     ---@field reppenrange number
@@ -1048,11 +1050,34 @@ return function(_python, _bridged)
             return
         elseif not bridged.vars.gamestarted and v == "" then
             error("`KoboldLib.submission` must not be set to the empty string when the story is empty")
+            return
         end
         bridged.vars.submission = v
     end
 
 
+    --==========================================================================
+    -- Userscript API: Soft prompt
+    --==========================================================================
+
+    ---@param t KoboldLib
+    ---@return string?
+    function KoboldLib_getters.spfilename(t)
+        return bridged.get_spfilename()
+    end
+
+    ---@param t KoboldLib
+    ---@param v string?
+    function KoboldLib_setters.spfilename(t, v)
+        if v:find("/") or v:find("\\") then
+            error("Cannot set `KoboldLib.spfilename` to a string that contains slashes")
+        end
+        if bridged.set_spfilename(v) then
+            maybe_require_regeneration()
+        end
+    end
+
+
     --==========================================================================
     -- Userscript API: Model information
     --==========================================================================
diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb
index 47c31f27..d70e1fea 100644
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@@ -7,7 +7,7 @@
       "private_outputs": true,
       "provenance": [],
       "collapsed_sections": [],
-      "authorship_tag": "ABX9TyOKIa/NDLlYI5j63GXPtkXv",
+      "authorship_tag": "ABX9TyN3BT4aQmdwrT2ibYyYjjMw",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -27,7 +27,7 @@
         "colab_type": "text"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/GPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
       ]
     },
     {
@@ -68,14 +68,20 @@
         "#@title <b><-- Click this to start KoboldAI</b>\n",
         "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
         "\n",
-        "Model = \"KoboldAI/GPT-Neo-2.7B-Janeway\" #@param [\"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
-        "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
+        "Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\" #@param [\"KoboldAI/fairseq-dense-2.7B-Nerys\", \"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
+        "Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
+        "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
         "\n",
         "!nvidia-smi\n",
         "from google.colab import drive\n",
         "drive.mount('/content/drive/')\n",
         "\n",
-        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version"
+        "if Provider == \"Localtunnel\":\n",
+        "  tunnel = \"--localtunnel yes\"\n",
+        "else:\n",
+        "  tunnel = \"\"\n",
+        "\n",
+        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
       ],
       "execution_count": null,
       "outputs": []
@@ -84,27 +90,32 @@
       "cell_type": "markdown",
       "source": [
         "# GPU Edition Model Descriptions\n",
-        "| Model                                                        | Size     | Style      | Description                                                  |\n",
-        "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
-        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
         "\n",
         "# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
         "\n",
-        "| Model                          | Size   | Style     | Drive Space | Description                                                  |\n",
-        "| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n",
-        "| Skein 6B by VE_FORBDRYDERNE    | 6B TPU | Hybrid    | 0 GB         | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n",
-        "| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB         | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| Lit 6B by Haru                 | 6B TPU | NSFW      | 8 GB /  12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| Generic 6B by EleutherAI       | 6B TPU | Generic   | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n",
-        "| C1 6B by Haru                  | 6B TPU | Chatbot   | 8 GB /  12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
+        "| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
+        "| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
         "\n",
         "\n",
         "| Style     | Description                                                  |\n",
@@ -113,7 +124,6 @@
         "| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
         "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
         "| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
-        "| Hybrid    | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n",
         "| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
         "\n",
         "# How to start KoboldAI in 7 simple steps\n",
diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb
index 9efaaa56..ee7f9fd3 100644
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@@ -7,7 +7,7 @@
         "colab_type": "text"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
       ]
     },
     {
@@ -18,7 +18,21 @@
         "\n",
         "For more information about KoboldAI check our our Github readme : https://github.com/KoboldAI/KoboldAI-Client/blob/main/readme.md\n",
         "\n",
-        "More (smaller) models are available in the **[GPU edition](https://colab.research.google.com/github/koboldai/KoboldAI-Client/blob/main/colab/GPU.ipynb)**!"
+        "More (smaller) models are available in the **[GPU edition](https://colab.research.google.com/github/koboldai/KoboldAI-Client/blob/main/colab/GPU.ipynb)**!\n",
+        "\n",
+        "---\n",
+        "## How to load KoboldAI: Everything you need to know\n",
+        "1. On a phone? First put your browser in desktop mode because of a Google Colab bug. Otherwise nothing will happen when you click the play button. Then tap the play button next to \"<-- Tap This if you play on Mobile\", you will see an audio player. Keep the audio player playing so Colab does not get shut down in the background.\n",
+        "2. Select the desired model, you will find a description of all the available models further down the page.\n",
+        "3. Click the play button next to \"<-- Select your model below and then click this to start KoboldAI\".\n",
+        "4. Got a message saying no accelerator is available? Click cancel, and try again in a few minutes. If you do not manage to get a session when you frequently try again try at a different time of day, colab can be busy or your priority may have been lowered by frequent usage.\n",
+        "5. After everything is done loading you will get a link that you can use to open KoboldAI. In case of Localtunnel you will also be warned that some people are abusing Localtunnel for phishing, once you acknowledge this warning you will be taken to KoboldAI's interface. If you picked Cloudflare and get a 1033 error refresh the error page after waiting one minute.\n",
+        "\n",
+        "---\n",
+        "\n",
+        "Further down the page you can find descriptions of the models, and tips to get the most out of your Google Colab experience.\n",
+        "\n",
+        "Make sure to keep this page open while you are using KoboldAI, and check back regularly to see if you got a Captcha. Failure to complete the captcha's in time can result in termination of your session or a lower priority towards the TPUs."
       ],
       "metadata": {
         "id": "zrLGxVCEaqZx"
@@ -47,11 +61,13 @@
       },
       "outputs": [],
       "source": [
+        "#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
+        "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
+        "\n",
         "#@title <b><-- Click this to start KoboldAI</b>\n",
-        "Model = \"Skein 6B\" #@param [\"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Lit 6B\", \"Shinen 6B\", \"Generic 6B\", \"C1 6B\"]\n",
-        "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
-        "Drive = \"Unextracted (Less Space)\" #@param [\"Unextracted (Less Space)\", \"Extracted (Faster Loading)\"]\n",
-        "#@markdown Extracted models take up more space but load faster the next time you use them, not all models use your Google Drive. See the Model list below for descriptions and space requirements. If your extracted model does not load the next time you try to launch KoboldAI delete the folder from your Google Drive and ensure enough space is available.\n",
+        "Model = \"Nerys 13B\" #@param [\"Nerys 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"Convo 6B\", \"C1 6B\", \"NeoX 20B\", \"facebook/opt-13b\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
+        "Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
+        "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
         "\n",
         "import os\n",
         "try:\n",
@@ -64,113 +80,115 @@
         "from google.colab import drive\n",
         "drive.mount('/content/drive/')\n",
         "\n",
-        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -i drive\n",
-        "\n",
-        "if Model == \"Skein 6B\":\n",
-        "  path = \"gpt-j-6b-skein-jax\"\n",
+        "if Model == \"Janeway 13B\":\n",
+        "  Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Nerys 13B\":\n",
+        "  Model = \"KoboldAI/fairseq-dense-13B-Nerys\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Shinen 13B\":\n",
+        "  Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"NeoX 20B\":\n",
+        "  Model = \"TPUMeshTransformerGPTNeoX\"\n",
+        "  path = \" -p gpt-neox-20b-jax\"\n",
         "  location = \"colab\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/skein-jax.txt\"\n",
+        "  download = \" -a https://storage.henk.tech/KoboldAI/neox-20b.txt\"\n",
         "  extract = \"\"\n",
         "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings\n",
-        "if Model == \"Janeway 6B\":\n",
-        "  path = \"gpt-j-6b-janeway-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/janeway-jax.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"rep_pen_slope\\\": 0.7,\\n   \\\"rep_pen_range\\\": 1024.0,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false,\\n      \\\"singleline\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false,\\n   \\\"chatmode\\\": false,\\n   \\\"chatname\\\": \\\"You\\\",\\n   \\\"dynamicscan\\\": false,\\n   \\\"nopromptgen\\\": false,\\n   \\\"rngpersist\\\": false,\\n   \\\"nogenmod\\\": false,\\n   \\\"autosave\\\": false,\\n   \\\"welcome\\\": false,\\n   \\\"newlinemode\\\": \\\"n\\\",\\n   \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n   \\\"userscripts\\\": [],\\n   \\\"corescript\\\": \\\"default.lua\\\",\\n   \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings\n",
-        "if Model == \"Adventure 6B\":\n",
-        "  path = \"gpt-j-6b-adventure-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \"-a https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-jax/aria2.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings\n",
-        "if Model == \"Lit 6B\":\n",
-        "  path = \"gpt-j-6b-lit-jax\"\n",
-        "  location = \"drive\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/aria2.php?file=gpt-j-6b-lit-jax.7z\"\n",
-        "  extract = \"-z gpt-j-6b-lit-jax.7z\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-lit-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-lit-jax.settings\n",
-        "if Model == \"Shinen 6B\":\n",
-        "  path = \"gpt-j-6b-shinen-jax\"\n",
-        "  location = \"colab\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/shinen-jax.txt\"\n",
-        "  extract = \"\"\n",
-        "  Drive = \"Unextracted (Less Space)\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"rep_pen_slope\\\": 0.7,\\n   \\\"rep_pen_range\\\": 1024.0,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false,\\n      \\\"singleline\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false,\\n   \\\"chatmode\\\": false,\\n   \\\"chatname\\\": \\\"You\\\",\\n   \\\"dynamicscan\\\": false,\\n   \\\"nopromptgen\\\": false,\\n   \\\"rngpersist\\\": false,\\n   \\\"nogenmod\\\": false,\\n   \\\"autosave\\\": false,\\n   \\\"welcome\\\": false,\\n   \\\"newlinemode\\\": \\\"n\\\",\\n   \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n   \\\"userscripts\\\": [],\\n   \\\"corescript\\\": \\\"default.lua\\\",\\n   \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings\n",
-        "if Model == \"Generic 6B\":\n",
-        "  path = \"step_383500\"\n",
-        "  location = \"drive\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/aria2.php?file=step_383500_slim.tar.zstd\"\n",
-        "  extract = \"-t step_383500_slim.tar.zstd\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/step_383500.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/step_383500.settings\n",
-        "if Model == \"C1 6B\":\n",
-        "  path = \"gpt-j-6b-c1-jax\"\n",
-        "  location = \"drive\"\n",
-        "  download = \"-a https://storage.henk.tech/KoboldAI/aria2.php?file=gpt-j-6b-c1-jax.7z\"\n",
-        "  extract = \"-z gpt-j-6b-c1-jax.7z\"\n",
-        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-c1-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.1,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"chatmode\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-c1-jax.settings\n",
+        "  ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n   \\\"apikey\\\": \\\"\\\",\\n   \\\"andepth\\\": 3,\\n   \\\"temp\\\": 0.5,\\n   \\\"top_p\\\": 0.9,\\n   \\\"top_k\\\": 0,\\n   \\\"tfs\\\": 1.0,\\n   \\\"rep_pen\\\": 1.03,\\n   \\\"genamt\\\": 80,\\n   \\\"max_length\\\": 2048,\\n   \\\"ikgen\\\": 200,\\n   \\\"formatoptns\\\": {\\n      \\\"frmttriminc\\\": true,\\n      \\\"frmtrmblln\\\": false,\\n      \\\"frmtrmspch\\\": false,\\n      \\\"frmtadsnsp\\\": false\\n   },\\n   \\\"numseqs\\\": 1,\\n   \\\"widepth\\\": 3,\\n   \\\"useprompt\\\": true,\\n   \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n",
+        "elif Model == \"Skein 6B\":\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Skein\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Janeway 6B\":\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Janeway\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Adventure 6B\":\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Lit 6B\":\n",
+        "  Model = \"hakurei/lit-6B\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Shinen 6B\":\n",
+        "  Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"Convo 6B\":\n",
+        "  Model = \"hitomi-team/convo-6B\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"C1 6B\":\n",
+        "  Model = \"hakurei/c1-6B\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "else:\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
         "\n",
-        "if Drive == \"Unextracted (Less Space)\":\n",
-        "  xloc = \"colab\"\n",
-        "if Drive == \"Extracted (Faster Loading)\":\n",
-        "  xloc = \"drive\"\n",
+        "if Provider == \"Localtunnel\":\n",
+        "  tunnel = \"--localtunnel yes\"\n",
+        "else:\n",
+        "  tunnel = \"\"\n",
         "\n",
-        "\n",
-        "!wget https://henk.tech/ckds -O - | bash /dev/stdin $download -l $location $extract -p $path -m TPUMeshTransformerGPTJ -g $Version -x $xloc"
+        "!wget https://henk.tech/ckds -O - | bash /dev/stdin $path$download -m $Model -g $Version $tunnel"
       ]
     },
     {
       "cell_type": "markdown",
       "source": [
-        "# TPU Edition Model Descriptions\n",
+        "## TPU Edition Model Descriptions\n",
         "\n",
-        "| Model                          | Size   | Style     | Drive Space | Description                                                  |\n",
-        "| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n",
-        "| Skein 6B by VE_FORBRYDERNE | 6B TPU | Hybrid    | 0 GB         | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n",
-        "| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB         | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| Lit 6B by Haru                 | 6B TPU | NSFW      | 8 GB /  12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| Generic 6B by EleutherAI       | 6B TPU | Generic   | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n",
-        "| C1 6B by Haru                  | 6B TPU | Chatbot   | 8 GB /  12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
+        "| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
+        "| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
+        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
         "\n",
         "\n",
         "# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
         "\n",
-        "| Model                                                        | Size     | Style      | Description                                                  |\n",
-        "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
-        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Size | Style | Description |\n",
+        "| --- | --- | --- | --- |\n",
+        "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
         "\n",
-        "| Style     | Description                                                  |\n",
-        "| --------- | ------------------------------------------------------------ |\n",
-        "| Novel     | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
-        "| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
+        "| Style | Description |\n",
+        "| --- | --- |\n",
+        "| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
+        "| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
         "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
-        "| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
-        "| Hybrid    | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n",
-        "| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
+        "| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
+        "| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
         "\n",
-        "## How to start KoboldAI in 7 simple steps\n",
-        "Using KoboldAI on Google Colab is easy! Simply follow these steps to get started:\n",
-        "1. Mobile phone? Tap the play button below next to \"<--- Tap this if you play on mobile\" to reveal an audio player, play the silent audio to keep the tab alive so Google will not shut you down when your using KoboldAI. If no audio player is revealed your phone browser does not support Google Colab in the mobile view, go to your browser menu and enable Desktop mode before you continue.\n",
-        "2. Select the model that most describes what you would like to do, by default we have the most recommended model for people willing to try out KoboldAI selected.\n",
-        "3. Click the play button next to \"<--- Click this to start KoboldAI\".\n",
-        "4. Allow Google Drive access, this typically happens trough a popup but sometimes Google Drive access may be requested trough the older method by asking you to click on a link and copy a code. This is normal behavior for Colab and only you will get access to your files, nothing is shared with us.\n",
-        "5. Now the automatic installation and Download process starts, for most models in the TPU edition expect the loading to take between 15 and 30 minutes on average depending on the current Colab download speeds and the model you selected. These downloads happen trough Google's internet connection, you will not be billed by your internet provider and it will not count towards any download limits.\n",
-        "6. After waiting a Trycloudflare link appears, click the link to enjoy KoboldAI. If you get a 1033 error Cloudflare is not done loading, in that case keep refreshing until it goes away. (If it keeps happening after 2 minutes Cloudflare has an issue, in that case you can use Runtime -> Restart and Run All to get a new link).\n",
-        "7. As you play KoboldAI, keep this Colab tab open in the background and check occationally for Captcha's so they do not shut your instance down. If you do get shut down you can always download a copy of your gamesave in the Save menu inside KoboldAI. Stories are never lost as long as you keep KoboldAI open in your browser.\n",
-        "\n",
-        "Get a error message saying you do not have access to a GPU/TPU instance? Do not continue and try again later, KoboldAI will not run correctly without them.\n",
-        "\n"
+        "---\n",
+        "## Tips to get the most out of Google Colab\n",
+        "-  Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.\n",
+        "- KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.\n",
+        "- Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.\n",
+        "- Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.\n",
+        "- Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.\n",
+        "- Models stored on Google Drive typically load faster than models we need to download from the internet."
       ],
       "metadata": {
         "id": "i0-9ARA3c4Fx"
diff --git a/colab/vscode.ipynb b/colab/vscode.ipynb
index 07c1e42b..6a8fe84c 100644
--- a/colab/vscode.ipynb
+++ b/colab/vscode.ipynb
@@ -67,6 +67,7 @@
         "!wget https://henk.tech/ckds -O - | bash /dev/stdin -g $Version -i only $Args\n",
         "\n",
         "!pip install colabcode\n",
+        "!pip install 'flask>=2.1.0'\n",
         "from colabcode import ColabCode\n",
         "ColabCode(authtoken=Authtoken)"
       ]
diff --git a/colabkobold.sh b/colabkobold.sh
index 9590bdd8..4c41675a 100644
--- a/colabkobold.sh
+++ b/colabkobold.sh
@@ -2,7 +2,7 @@
 # KoboldAI Easy Colab Deployment Script by Henk717
 
 # read the options
-TEMP=`getopt -o m:i:p:c:d:x:a:l:z:g:t:n:b: --long model:,init:,path:,configname:,download:,aria2:,dloc:xloc:7z:git:tar:ngrok:branch: -- "$@"`
+TEMP=`getopt -o m:i:p:c:d:x:a:l:z:g:t:n:b:s: --long model:,init:,path:,configname:,download:,aria2:,dloc:,xloc:,7z:,git:,tar:,ngrok:,branch:,savemodel:,localtunnel:,lt: -- "$@"`
 eval set -- "$TEMP"
 
 # extract options and their arguments into variables.
@@ -17,7 +17,9 @@ while true ; do
         -c|--configname)
             configname=" --configname $2" ; shift 2 ;;
         -n|--ngrok)
-            configname=" --ngrok" ; shift 2 ;;
+            ngrok=" --ngrok" ; shift 2 ;;
+        --lt|--localtunnel)
+            localtunnel=" --localtunnel" ; shift 2 ;;
         -d|--download)
             download="$2" ; shift 2 ;;
         -a|--aria2)
@@ -34,6 +36,8 @@ while true ; do
             git="$2" ; shift 2 ;;
         -b|--branch)
             branch="$2" ; shift 2 ;;
+        -s|--savemodel)
+            savemodel=" --savemodel" ; shift 2 ;;
         --) shift ; break ;;
         *) echo "Internal error!" ; exit 1 ;;
     esac
@@ -48,8 +52,8 @@ function launch
         exit 0
     else
     cd /content/KoboldAI-Client
-    echo "Launching KoboldAI with the following options : python3 aiserver.py$model$kmpath$configname$ngrok --remote --override_delete --override_rename"
-    python3 aiserver.py$model$kmpath$configname$ngrok --colab
+    echo "Launching KoboldAI with the following options : python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel --colab"
+    python3 aiserver.py$model$kmpath$configname$ngrok$localtunnel$savemodel --colab
     exit
     fi
 }
@@ -134,28 +138,32 @@ if [ "$init" != "skip" ]; then
 
     cd /content/KoboldAI-Client
 
-    cp -rn stories/*.* /content/drive/MyDrive/KoboldAI/stories/
-    cp -rn userscripts/*.* /content/drive/MyDrive/KoboldAI/userscripts/
-    cp -rn softprompts/*.* /content/drive/MyDrive/KoboldAI/softprompts/
+    cp -rn stories/* /content/drive/MyDrive/KoboldAI/stories/
+    cp -rn userscripts/* /content/drive/MyDrive/KoboldAI/userscripts/
+    cp -rn softprompts/* /content/drive/MyDrive/KoboldAI/softprompts/
     rm stories
     rm -rf stories/
     rm userscripts
     rm -rf userscripts/
     rm softprompts
     rm -rf softprompts/
+    rm models
+    rm -rf models/
     ln -s /content/drive/MyDrive/KoboldAI/stories/ stories
     ln -s /content/drive/MyDrive/KoboldAI/settings/ settings
     ln -s /content/drive/MyDrive/KoboldAI/softprompts/ softprompts
     ln -s /content/drive/MyDrive/KoboldAI/userscripts/ userscripts
+    ln -s /content/drive/MyDrive/KoboldAI/models/ models
 
-    if [ "$model" == " --model TPUMeshTransformerGPTJ" ]; then
+    if [ -n "${COLAB_TPU_ADDR+set}" ]; then
         pip install -r requirements_mtj.txt
     else
         pip install -r requirements.txt
     fi
     
-    # Make sure Colab has netbase
-    sudo apt install netbase -y
+    # Make sure Colab has the system dependencies
+    sudo apt install netbase aria2 -y
+    npm install -g localtunnel
 fi
 
 cd /content
@@ -178,8 +186,7 @@ fi
 
 #Download routine for Aria2c scripts
 if [ ! -z ${aria2+x} ]; then
-    apt install aria2 -y
-    curl -L $aria2 | aria2c -c -i- -d$dloc --user-agent=KoboldAI --file-allocation=none
+    curl -L $aria2 | aria2c -x 10 -s 10 -j 10 -c -i- -d$dloc --user-agent=KoboldAI --file-allocation=none
 fi
 
 #Extract the model with 7z
diff --git a/commandline-rocm.sh b/commandline-rocm.sh
new file mode 100755
index 00000000..5c9a54aa
--- /dev/null
+++ b/commandline-rocm.sh
@@ -0,0 +1 @@
+bin/micromamba run -r runtime -n koboldai-rocm bash
diff --git a/commandline.bat b/commandline.bat
index 00f0a031..001c0bae 100644
--- a/commandline.bat
+++ b/commandline.bat
@@ -10,18 +10,18 @@ IF %M%==3 GOTO drivemap_B
 SET TEMP=%~DP0MINICONDA3
 SET TMP=%~DP0MINICONDA3
 call miniconda3\condabin\activate
-cmd /k
+cmd /k "%*"
 
 :drivemap
 subst K: miniconda3 >nul
 SET TEMP=K:\
 SET TMP=K:\
 call K:\python\condabin\activate
-cmd /k
+cmd /k "%*"
 
 :drivemap_B
 subst B: miniconda3 >nul
 SET TEMP=B:\
 SET TMP=B:\
 call B:\python\condabin\activate
-cmd /k
\ No newline at end of file
+cmd /k "%*"
\ No newline at end of file
diff --git a/commandline.sh b/commandline.sh
new file mode 100755
index 00000000..72338169
--- /dev/null
+++ b/commandline.sh
@@ -0,0 +1 @@
+bin/micromamba run -r runtime -n koboldai bash
diff --git a/disconnect-kobold-drive.bat b/disconnect-kobold-drive.bat
new file mode 100644
index 00000000..69a0f43f
--- /dev/null
+++ b/disconnect-kobold-drive.bat
@@ -0,0 +1,7 @@
+@echo off
+SET /P M=<loader.settings
+IF %M%==3 subst /D B:
+IF %M%==1 subst /D K:
+cls
+echo KoboldAI Drive disconnected
+pause
\ No newline at end of file
diff --git a/play-cuda.sh b/docker-cuda.sh
old mode 100644
new mode 100755
similarity index 100%
rename from play-cuda.sh
rename to docker-cuda.sh
diff --git a/docker-cuda/Dockerfile b/docker-cuda/Dockerfile
index 7a3f1922..49ec3c8a 100644
--- a/docker-cuda/Dockerfile
+++ b/docker-cuda/Dockerfile
@@ -6,4 +6,4 @@ WORKDIR /content/
 COPY env.yml /home/micromamba/env.yml
 RUN micromamba install -y -n base -f /home/micromamba/env.yml
 USER root
-RUN apt update && apt install xorg -y
+RUN apt update && apt install xorg -y
\ No newline at end of file
diff --git a/docker-rocm.sh b/docker-rocm.sh
new file mode 100755
index 00000000..d32c404c
--- /dev/null
+++ b/docker-rocm.sh
@@ -0,0 +1,4 @@
+cd docker-rocm
+xhost +local:docker
+cp ../environments/rocm.yml env.yml
+docker-compose run --service-ports koboldai bash -c "cd /content && python3 aiserver.py $*"
diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml
index 69c6d933..b49f0bd7 100644
--- a/environments/finetuneanon.yml
+++ b/environments/finetuneanon.yml
@@ -14,7 +14,7 @@ dependencies:
   - markdown
   - bleach=4.1.0
   - pip
-  - git
+  - git=2.35.1
   - pip:
     - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b
     - flask-cloudflared
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 240c6a1b..e518be3b 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -6,16 +6,18 @@ channels:
 dependencies:
   - colorama
   - flask-socketio
-  - pytorch
+  - pytorch=1.11.*
   - python=3.8.*
   - cudatoolkit=11.1
-  - transformers
   - eventlet
   - markdown
   - bleach=4.1.0
   - pip
-  - git
+  - git=2.35.1
+  - sentencepiece
+  - protobuf
   - pip:
     - flask-cloudflared
     - flask-ngrok
     - lupa==1.10
+    - transformers>=4.17
\ No newline at end of file
diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml
index 295a041f..5672ed21 100644
--- a/environments/rocm-finetune.yml
+++ b/environments/rocm-finetune.yml
@@ -10,7 +10,7 @@ dependencies:
   - markdown
   - bleach=4.1.0
   - pip
-  - git
+  - git=2.35.1
   - pip:
     - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
     - torch
diff --git a/environments/rocm.yml b/environments/rocm.yml
index d23c5a47..2ca9c670 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -3,7 +3,6 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - transformers
   - colorama
   - flask-socketio
   - python=3.8.*
@@ -11,11 +10,14 @@ dependencies:
   - markdown
   - bleach=4.1.0
   - pip
-  - git
+  - git=2.35.1
+  - sentencepiece
+  - protobuf
   - pip:
     - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
-    - torch
-    - torchvision==0.11.1
+    - torch==1.10.*
+    - torchvision
     - flask-cloudflared
     - flask-ngrok
     - lupa==1.10
+    - transformers>=4.17
diff --git a/fileops.py b/fileops.py
index 50f1e94f..c303764e 100644
--- a/fileops.py
+++ b/fileops.py
@@ -1,5 +1,3 @@
-import tkinter as tk
-from tkinter import filedialog
 from os import getcwd, listdir, path
 from typing import Tuple, Union, Optional
 import os
@@ -10,6 +8,8 @@ import zipfile
 #  Generic Method for prompting for file path
 #==================================================================#
 def getsavepath(dir, title, types):
+    import tkinter as tk
+    from tkinter import filedialog
     root = tk.Tk()
     root.attributes("-topmost", True)
     path = tk.filedialog.asksaveasfile(
@@ -28,6 +28,8 @@ def getsavepath(dir, title, types):
 #  Generic Method for prompting for file path
 #==================================================================#
 def getloadpath(dir, title, types):
+    import tkinter as tk
+    from tkinter import filedialog
     root = tk.Tk()
     root.attributes("-topmost", True)
     path = tk.filedialog.askopenfilename(
@@ -45,6 +47,8 @@ def getloadpath(dir, title, types):
 #  Generic Method for prompting for directory path
 #==================================================================#
 def getdirpath(dir, title):
+    import tkinter as tk
+    from tkinter import filedialog
     root = tk.Tk()
     root.attributes("-topmost", True)
     path = filedialog.askdirectory(
@@ -61,30 +65,30 @@ def getdirpath(dir, title):
 #  Returns the path (as a string) to the given story by its name
 #==================================================================#
 def storypath(name):
-    return path.join(path.dirname(path.realpath(__file__)), "stories", name + ".json")
+    return path.join("stories", name + ".json")
 
 #==================================================================#
 #  Returns the path (as a string) to the given soft prompt by its filename
 #==================================================================#
 def sppath(filename):
-    return path.join(path.dirname(path.realpath(__file__)), "softprompts", filename)
+    return path.join("softprompts", filename)
 
 #==================================================================#
 #  Returns the path (as a string) to the given username by its filename
 #==================================================================#
 def uspath(filename):
-    return path.join(path.dirname(path.realpath(__file__)), "userscripts", filename)
+    return path.join("userscripts", filename)
 
 #==================================================================#
 #  Returns an array of dicts containing story files in /stories
 #==================================================================#
 def getstoryfiles():
     list = []
-    for file in listdir(path.dirname(path.realpath(__file__))+"/stories"):
+    for file in listdir("stories"):
         if file.endswith(".json"):
             ob = {}
             ob["name"] = file.replace(".json", "")
-            f = open(path.dirname(path.realpath(__file__))+"/stories/"+file, "r")
+            f = open("stories/"+file, "r")
             try:
                 js = json.load(f)
             except:
@@ -108,7 +112,7 @@ def checksp(filename: str, model_dimension: int) -> Tuple[Union[zipfile.ZipFile,
     if 'np' not in globals():
         import numpy as np
     try:
-        z = zipfile.ZipFile(path.dirname(path.realpath(__file__))+"/softprompts/"+filename)
+        z = zipfile.ZipFile("softprompts/"+filename)
         with z.open('tensor.npy') as f:
             # Read only the header of the npy file, for efficiency reasons
             version: Tuple[int, int] = np.lib.format.read_magic(f)
@@ -118,7 +122,10 @@ def checksp(filename: str, model_dimension: int) -> Tuple[Union[zipfile.ZipFile,
             shape, fortran_order, dtype = np.lib.format._read_array_header(f, version)
             assert len(shape) == 2
     except:
-        z.close()
+        try:
+            z.close()
+        except UnboundLocalError:
+            pass
         return 1, None, None, None, None
     if dtype not in ('V2', np.float16, np.float32):
         z.close()
@@ -136,8 +143,8 @@ def checksp(filename: str, model_dimension: int) -> Tuple[Union[zipfile.ZipFile,
 #==================================================================#
 def getspfiles(model_dimension: int):
     lst = []
-    os.makedirs(path.dirname(path.realpath(__file__))+"/softprompts", exist_ok=True)
-    for file in listdir(path.dirname(path.realpath(__file__))+"/softprompts"):
+    os.makedirs("softprompts", exist_ok=True)
+    for file in listdir("softprompts"):
         if not file.endswith(".zip"):
             continue
         z, version, shape, fortran_order, dtype = checksp(file, model_dimension)
@@ -170,8 +177,8 @@ def getspfiles(model_dimension: int):
 #==================================================================#
 def getusfiles(long_desc=False):
     lst = []
-    os.makedirs(path.dirname(path.realpath(__file__))+"/userscripts", exist_ok=True)
-    for file in listdir(path.dirname(path.realpath(__file__))+"/userscripts"):
+    os.makedirs("userscripts", exist_ok=True)
+    for file in listdir("userscripts"):
         if file.endswith(".lua"):
             ob = {}
             ob["filename"] = file
diff --git a/gensettings.py b/gensettings.py
index f4bfe7ed..e8d4e566 100644
--- a/gensettings.py
+++ b/gensettings.py
@@ -51,8 +51,19 @@ gensettingstf = [
 	"min": 0.0,
 	"max": 1.0,
 	"step": 0.05,
-	"default": 0.0,
+	"default": 1.0,
     "tooltip": "Alternative sampling method; it is recommended to disable top_p and top_k (set top_p to 1 and top_k to 0) if using this. 0.95 is thought to be a good value. (Put this value on 1 to disable its effect)"
+	},
+	{
+	"uitype": "slider",
+	"unit": "float",
+	"label": "Typical Sampling",
+	"id": "settypical", 
+	"min": 0.0,
+	"max": 1.0,
+	"step": 0.05,
+	"default": 1.0,
+    "tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
 	},
 	{
 	"uitype": "slider",
@@ -207,6 +218,17 @@ gensettingstf = [
 	"step": 1,
 	"default": 0,
   "tooltip": "Disables userscript generation modifiers."
+	},
+    {
+	"uitype": "toggle",
+	"unit": "bool",
+	"label": "Debug",
+	"id": "debug",
+	"min": 0,
+	"max": 1,
+	"step": 1,
+	"default": 0,
+  "tooltip": "Show debug info"
 	}
 ]
 
@@ -341,6 +363,17 @@ gensettingsik =[{
 	"step": 1,
 	"default": 0,
     "tooltip": "When enabled, the Memory text box in the Random Story dialog will be prefilled by default with your current story's memory instead of being empty."
+	},
+    {
+	"uitype": "toggle",
+	"unit": "bool",
+	"label": "Debug",
+	"id": "debug",
+	"min": 0,
+	"max": 1,
+	"step": 1,
+	"default": 0,
+  "tooltip": "Show debug info"
 	}
 ]
 
diff --git a/install_requirements.bat b/install_requirements.bat
index a6ae3b1e..cd7b0aef 100644
--- a/install_requirements.bat
+++ b/install_requirements.bat
@@ -45,6 +45,8 @@ subst B: miniconda3
 SET TEMP=B:\
 SET TMP=B:\
 copy umamba.exe B:\umamba.exe
+copy loader.settings B:\loader.settings
+copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
 B:
 umamba.exe create -r B:\python\ -n base
 IF %B%==1 umamba.exe install --no-shortcuts -r B:\python\ -n base -f "%~dp0\environments\huggingface.yml" -y --always-copy
diff --git a/install_requirements.sh b/install_requirements.sh
new file mode 100755
index 00000000..7d2c0c2f
--- /dev/null
+++ b/install_requirements.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+if [[ $1 = "cuda" ]]; then
+wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
+# Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
+bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
+exit
+fi
+if [[ $1 = "rocm" ]]; then
+wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
+# Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
+bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
+exit
+fi
+echo Please specify either CUDA or ROCM
diff --git a/koboldai.ico b/koboldai.ico
new file mode 100644
index 00000000..4e39d0de
Binary files /dev/null and b/koboldai.ico differ
diff --git a/koboldaiblue.ico b/koboldaiblue.ico
new file mode 100644
index 00000000..944d7539
Binary files /dev/null and b/koboldaiblue.ico differ
diff --git a/koboldaigreen.ico b/koboldaigreen.ico
new file mode 100644
index 00000000..63622231
Binary files /dev/null and b/koboldaigreen.ico differ
diff --git a/maps/gpt_neo.json b/maps/gpt_neo.json
new file mode 100644
index 00000000..fa2d4084
--- /dev/null
+++ b/maps/gpt_neo.json
@@ -0,0 +1,32 @@
+{
+  "mtj_compat": "neo",
+  "mtj_pe": "fixed",
+  "mtj_config_map": {
+    "d_model": "hidden_size",
+    "n_heads": "num_heads",
+    "layers": "num_layers"
+  },
+  "static_weights": {
+    "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "transformer.wpe.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose"]}},
+    "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+    "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}
+  },
+  "layer_weights": {
+    "transformer.h.{layer}.attn.attention.bias": {},
+    "transformer.h.{layer}.attn.attention.masked_bias": {},
+    "transformer.h.{layer}.attn.attention.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "transformer.h.{layer}.attn.attention.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "transformer.h.{layer}.attn.attention.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "transformer.h.{layer}.attn.attention.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "transformer.h.{layer}.attn.attention.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "transformer.h.{layer}.mlp.c_fc.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "transformer.h.{layer}.mlp.c_fc.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "transformer.h.{layer}.mlp.c_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "transformer.h.{layer}.mlp.c_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "transformer.h.{layer}.ln_1.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "transformer.h.{layer}.ln_1.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "transformer.h.{layer}.ln_2.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "transformer.h.{layer}.ln_2.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
diff --git a/maps/gptj.json b/maps/gptj.json
new file mode 100644
index 00000000..8e0bc9da
--- /dev/null
+++ b/maps/gptj.json
@@ -0,0 +1,32 @@
+{
+  "mtj_compat": "j",
+  "mtj_pe": "rotary",
+  "mtj_config_map": {
+    "pe_rotary_dims": ["rotary_dim", 64],
+    "d_model": "n_embd",
+    "n_heads": "n_head",
+    "layers": "n_layer"
+  },
+  "static_weights": {
+    "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}},
+    "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+    "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
+    "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}},
+    "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}}
+  },
+  "layer_weights": {
+    "transformer.h.{layer}.attn.bias": {},
+    "transformer.h.{layer}.attn.masked_bias": {},
+    "transformer.h.{layer}.attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "transformer.h.{layer}.attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "transformer.h.{layer}.attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "transformer.h.{layer}.attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "transformer.h.{layer}.mlp.fc_in.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "transformer.h.{layer}.mlp.fc_in.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "transformer.h.{layer}.mlp.fc_out.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "transformer.h.{layer}.mlp.fc_out.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "transformer.h.{layer}.ln_1.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "transformer.h.{layer}.ln_1.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}}
+  }
+}
diff --git a/maps/opt.json b/maps/opt.json
new file mode 100644
index 00000000..c99ae19f
--- /dev/null
+++ b/maps/opt.json
@@ -0,0 +1,35 @@
+{
+  "mtj_compat": "opt",
+  "mtj_pe": "fixed",
+  "mtj_config_map": {
+    "do_layer_norm_before": ["do_layer_norm_before", true],
+    "d_embed": "word_embed_proj_dim",
+    "d_model": "hidden_size",
+    "n_heads": "num_attention_heads",
+    "layers": "num_hidden_layers"
+  },
+  "static_weights": {
+    "decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}},
+    "decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}},
+    "decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}}
+  },
+  "layer_weights": {
+    "decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
diff --git a/maps/xglm.json b/maps/xglm.json
new file mode 100644
index 00000000..581f825c
--- /dev/null
+++ b/maps/xglm.json
@@ -0,0 +1,32 @@
+{
+  "mtj_compat": "fairseq_lm",
+  "mtj_pe": "fairseq_sinusoidal",
+  "mtj_config_map": {
+    "d_model": "d_model",
+    "n_heads": "attention_heads",
+    "layers": "num_layers"
+  },
+  "static_weights": {
+    "model.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "model.layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+    "model.layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}
+  },
+  "layer_weights": {
+    "model.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "model.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
+    "model.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "model.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
+    "model.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "model.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
+    "model.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "model.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "model.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "model.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "model.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "model.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "model.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "model.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "model.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "model.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
diff --git a/models/models go here.txt b/models/models go here.txt
new file mode 100644
index 00000000..6b4597d4
--- /dev/null
+++ b/models/models go here.txt	
@@ -0,0 +1,2 @@
+Place the models extracted in their own subfolder.
+Downloaded models from the menu will automatically appear here.
\ No newline at end of file
diff --git a/play-rocm.sh b/play-rocm.sh
old mode 100644
new mode 100755
index d32c404c..6cf2794a
--- a/play-rocm.sh
+++ b/play-rocm.sh
@@ -1,4 +1,5 @@
-cd docker-rocm
-xhost +local:docker
-cp ../environments/rocm.yml env.yml
-docker-compose run --service-ports koboldai bash -c "cd /content && python3 aiserver.py $*"
+#!/bin/bash
+if [ ! -f "runtime/envs/koboldai-rocm/bin/python" ]; then
+./install_requirements.sh rocm
+fi
+bin/micromamba run -r runtime -n koboldai-rocm python aiserver.py $*
diff --git a/play.bat b/play.bat
index 39052ebf..4e54fbba 100644
--- a/play.bat
+++ b/play.bat
@@ -16,20 +16,20 @@ cmd /k
 
 :drivemap
 ECHO Runtime launching in K: drive mode
+subst /D K: >nul
 subst K: miniconda3 >nul
 SET TEMP=K:\
 SET TMP=K:\
 call K:\python\condabin\activate
 python aiserver.py %*
-subst K: /D
 cmd /k
 
 :drivemap_B
 ECHO Runtime launching in B: drive mode
+subst /D B: >nul
 subst B: miniconda3 >nul
 SET TEMP=B:\
 SET TMP=B:\
 call B:\python\condabin\activate
 python aiserver.py %*
-subst B: /D
 cmd /k
\ No newline at end of file
diff --git a/play.ipynb b/play.ipynb
deleted file mode 100644
index af94064b..00000000
--- a/play.ipynb
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "KoboldAI Jupyter",
-      "provenance": [],
-      "authorship_tag": "ABX9TyMDTbAhtDnKJa+aIEaQjpsL"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "TPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# KoboldAI Launcher for generic Jupyter Notebooks\n",
-        "This notebook is meant as a way to easily launch KoboldAI on existing Jupyter instances that already have KoboldAI installed (For example a custom Saturn Cloud or Paperspace instance).\n",
-        "\n",
-        "For Google Colab please check out our Google Colab edition available at : https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb"
-      ],
-      "metadata": {
-        "id": "hMRnGz42Xsy3"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "40B1QvI3Xv02"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -r requirements.txt\n",
-        "!python3 aiserver.py --remote"
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/play.sh b/play.sh
new file mode 100755
index 00000000..10f3c8aa
--- /dev/null
+++ b/play.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+if [ ! -f "runtime/envs/koboldai/bin/python" ]; then
+./install_requirements.sh cuda
+fi
+bin/micromamba run -r runtime -n koboldai python aiserver.py $*
diff --git a/readme.md b/readme.md
index ac0b7a77..a136c856 100644
--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,4 @@
-# KoboldAI - Your gateway to GPT writing
+## KoboldAI - Your gateway to GPT writing
 
 This is a browser-based front-end for AI-assisted writing with multiple local & remote AI models. It offers the standard array of tools, including Memory, Author's Note, World Info, Save & Load, adjustable AI settings, formatting options, and the ability to import existing AI Dungeon adventures. You can also turn on Adventure mode and play the game like AI Dungeon Unleashed.
 
@@ -12,9 +12,9 @@ By default KoboldAI will run in a generic mode optimized for writing, but with t
 
 The gameplay will be slightly different than the gameplay in AI Dungeon because we adopted the Type of the Unleashed fork, giving you full control over all the characters because we do not automatically adapt your sentences behind the scenes. This means you can more reliably control characters that are not you.
 
-As a result of this what you need to type is slightly different, in AI Dungeon you would type ***take the sword*** while in KoboldAI you would type it like a sentence such as ***You take the sword*** and this is best done with the word You instead of I.
+As a result of this what you need to type is slightly different, in AI Dungeon you would type _**take the sword**_ while in KoboldAI you would type it like a sentence such as _**You take the sword**_ and this is best done with the word You instead of I.
 
-To speak simply type : *You say "We should probably gather some supplies first"*
+To speak simply type : _You say "We should probably gather some supplies first"_  
 Just typing the quote might work, but the AI is at its best when you specify who does what in your commands.
 
 If you want to do this with your friends we advise using the main character as You and using the other characters by their name if you are playing on a model trained for Adventures. These models assume there is a You in the story. This mode does usually not perform well on Novel models because they do not know how to handle the input those are best used with regular story writing where you take turns with the AI.
@@ -27,7 +27,7 @@ If you want to use KoboldAI as a writing assistant this is best done in the regu
 
 In chatbot mode you can use a suitable model as a chatbot, this mode automatically adds your name to the beginning of the sentences and prevents the AI from talking as you. To use it properly you must write your story opening as both characters in the following format (You can use your own text) :
 
-``` ChatBot Opening Example
+```plaintext
 Bot : Hey!
 You : Hey Boyname, how have you been?
 Bot : Been good! How about you?
@@ -42,8 +42,6 @@ This mode works the best on either a Generic model or a chatbot model specifical
 
 Novel or Adventure models are not recommended for this feature but might still work but can derail away from the conversation format quickly.
 
-
-
 ## Play KoboldAI online for free on Google Colab (The easiest way to play)
 
 If you would like to play KoboldAI online for free on a powerful computer you can use Google Colaboraty. We provide two editions, a TPU and a GPU edition with a variety of models available. These run entirely on Google's Servers and will automatically upload saves to your Google Drive if you choose to save a story (Alternatively, you can choose to download your save instead so that it never gets stored on Google Drive). Detailed instructions on how to use them are at the bottom of the Colab's.
@@ -52,35 +50,71 @@ Each edition features different models and requires different hardware to run, t
 
 ### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
 
-| Model                          | Size   | Type     | Drive Space | Description                                                  |
-| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |
-| Skein 6B by VE_FORBDRYDERNE    | 6B TPU | Hybrid    | 0 GB         | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |
-| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB         | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
-| Lit 6B by Haru                 | 6B TPU | NSFW      | 8 GB /  12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
-| Generic 6B by EleutherAI       | 6B TPU | Generic   | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |
-| C1 6B by Haru                  | 6B TPU | Chatbot   | 8 GB /  12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
+| Model | Size | Style | Description |
+| --- | --- | --- | --- |
+| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
+| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
+| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
+| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
+| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
+| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
+| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
+| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
+| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
+| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
+| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
+
+## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
+
+| Model | Size | Style | Description |
+| --- | --- | --- | --- |
+| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
+| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
+| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
+| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
+| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
+
+| Style | Description |
+| --- | --- |
+| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
+| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
+| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
+| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
+| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
+
+## Tips to get the most out of Google Colab
+
+*   Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
+*   KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
+*   Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
+*   Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
+*   Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
+*   Models stored on Google Drive typically load faster than models we need to download from the internet.
 
 ### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
 
-| Model                                                        | Size     | Type      | Description                                                  |
-| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
-| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel Type writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
+| Model | Size | Type | Description |
+| --- | --- | --- | --- |
+| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel Type writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
 | [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
 | [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
-| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel Type model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
-| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
-| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
+| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel Type model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
+| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
 
 ### Model Types
-| Type     | Description                                                  |
-| --------- | ------------------------------------------------------------ |
-| Novel     | For regular story writing, not compatible with Adventure mode or other specialty modes. |
-| NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
-| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel Type model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
-| Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
-| Hybrid    | Hybrid models are a blend between different Types, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playTypes and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |
-| Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
 
+| Type | Description |
+| --- | --- |
+| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
+| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
+| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel Type model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
+| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
+| Hybrid | Hybrid models are a blend between different Types, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playTypes and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |
+| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
 
 ## Install KoboldAI on your own computer
 
@@ -94,33 +128,42 @@ The easiest way for Windows users is to use the [offline installer](https://sour
 
 ### Installing KoboldAI offline bundle on Windows 7 or higher using the KoboldAI Offline Installer (Easiest)
 
-1. [Download the latest offline installer from here](https://sourceforge.net/projects/koboldai/files/latest/download)
-2. Run the installer to place KoboldAI on a location of choice, KoboldAI is portable software and is not bound to a specific harddrive. (Because of long paths inside our dependencies you may not be able to extract it many folders deep).
-3. Update KoboldAI to the latest version with update-koboldai.bat if desired.
-4. Use KoboldAI offline using play.bat or remotely with remote-play.bat
+1.  [Download the latest offline installer from here](https://sourceforge.net/projects/koboldai/files/latest/download)
+2.  Run the installer to place KoboldAI on a location of choice, KoboldAI is portable software and is not bound to a specific harddrive. (Because of long paths inside our dependencies you may not be able to extract it many folders deep).
+3.  Update KoboldAI to the latest version with update-koboldai.bat if desired.
+4.  Use KoboldAI offline using play.bat or remotely with remote-play.bat
 
 ### Installing KoboldAI Github release on Windows 10 or higher using the KoboldAI Runtime Installer
 
-1. Extract the .zip to a location you wish to install KoboldAI, you will need roughly 20GB of free space for the installation (this does not include the models).
-2. Open install_requirements.bat as **administrator**.
-3. Choose the regular version of Transformers (Option 1), finetuneanon is depreciated and no longer recommended.
-4. You will now be asked to choose the installation mode, we **strongly** recommend the Temporary B: drive option. This option eliminates most installation issues and also makes KoboldAI portable. The B: drive will be gone after a reboot and will automatically be recreated each time you play KoboldAI.
-5. The installation will now automatically install its requirements, some stages may appear to freeze do not close the installer until it asks you to press a key. Before pressing a key to exit the installer please check if errors occurred. Most problems with the game crashing are related to installation/download errors. Disabling your antivirus can help if you get errors.
-6. Use play.bat to start KoboldAI.
+1.  Extract the .zip to a location you wish to install KoboldAI, you will need roughly 20GB of free space for the installation (this does not include the models).
+2.  Open install\_requirements.bat as **administrator**.
+3.  Choose the regular version of Transformers (Option 1), finetuneanon is depreciated and no longer recommended.
+4.  You will now be asked to choose the installation mode, we **strongly** recommend the Temporary B: drive option. This option eliminates most installation issues and also makes KoboldAI portable. The B: drive will be gone after a reboot and will automatically be recreated each time you play KoboldAI.
+5.  The installation will now automatically install its requirements, some stages may appear to freeze do not close the installer until it asks you to press a key. Before pressing a key to exit the installer please check if errors occurred. Most problems with the game crashing are related to installation/download errors. Disabling your antivirus can help if you get errors.
+6.  Use play.bat to start KoboldAI.
 
-### Manual installation / Linux / Mac
+### Installing KoboldAI on Linux using the KoboldAI Runtime (Easiest)
+
+1.  Clone the URL of this Github repository (For example git clone [https://github.com/koboldai/koboldai-client](https://github.com/koboldai/koboldai-client) )
+2.  AMD user? Make sure ROCm is installed if you want GPU support. Is yours not compatible with ROCm? Follow the usual instructions.
+3.  Run play.sh or if your AMD GPU supports ROCm use play-rocm.sh
+
+KoboldAI will now automatically configure its dependencies and start up, everything is contained in its own conda runtime so we will not clutter your system. The files will be located in the runtime subfolder. If at any point you wish to force a reinstallation of the runtime you can do so with the install\_requirements.sh file. While you can run this manually it is not neccesary.
+
+### Manual installation / Mac
 
 We can not provide a step by step guide for manual installation due to the vast differences between the existing software configuration and the systems of our users.
 
 If you would like to manually install KoboldAI you will need some python/conda package management knowledge to manually do one of the following steps :
 
-1. Use our bundled environments files to install your own conda environment, this should also automatically install CUDA (Recommended, you can get Miniconda from https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links). The recommended configuration is huggingface.yml for CUDA users and rocm.yml for ROCm users.
-2. If you have a working copy of Docker for either CUDA or ROCm try play-cuda.sh or play-rocm.sh to launch the docker versions. In this case the installation is mostly automatic.
-3. If conda is proving difficult you could also look inside requirements.txt for the required dependencies and try to install them yourself. This will likely be a mixture of pip and your native package manager, just installing our requirements.txt is not recommended since to speed things up we do not force any version changes. For local installations definitely prioritize conda as that is a better way for us to enforce you have the latest compatible versions.
+1.  Use our bundled environments files to install your own conda environment, this should also automatically install CUDA (Recommended, you can get Miniconda from https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links). The recommended configuration is huggingface.yml for CUDA users and rocm.yml for ROCm users.
+2.  If conda is proving difficult you could also look inside requirements.txt for the required dependencies and try to install them yourself. This will likely be a mixture of pip and your native package manager, just installing our requirements.txt is not recommended since we assume local users will run conda to get all dependencies. For local installations definitely prioritize conda as that is a better way for us to enforce that you have the compatible versions.
+3.  Clone our Github or download the zip file.
+4.  Now start KoboldAI with aiserver.py and not with our play.bat or play.sh files.
 
-### AMD GPU's
+### AMD GPU's (Linux only)
 
-AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. 
+AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. Make sure to first install ROCm on your Linux system using a guide for your distribution, after that you can follow the usual linux instructions above.
 
 ### Troubleshooting
 
@@ -140,45 +183,17 @@ In general, the less versions of Python you have on your system the higher your
 
 GPU not found errors can be caused by one of two things, either you do not have a suitable Nvidia GPU (It needs Compute Capability 5.0 or higher to be able to play KoboldAI). Your Nvidia GPU is supported by KoboldAI but is not supported by the latest version of CUDA. Your Nvidia GPU is not yet supported by the latest version of CUDA or you have a dependency conflict like the ones mentioned above.
 
-Like with Python version conflicts we recommend uninstalling CUDA from your system if you have manually installed it and do not need it for anything else and trying again. If your GPU needs CUDA10 to function open environments\finetuneanon.yml and add a line that says - cudatoolkit=10.2 underneath dependencies: . After this you can run the installer again (Pick the option to delete the existing files) and it will download a CUDA10 compatible version.
+Like with Python version conflicts we recommend uninstalling CUDA from your system if you have manually installed it and do not need it for anything else and trying again. If your GPU needs CUDA10 to function open environments\\finetuneanon.yml and add a line that says - cudatoolkit=10.2 underneath dependencies: . After this you can run the installer again (Pick the option to delete the existing files) and it will download a CUDA10 compatible version.
 
 If you do not have a suitable Nvidia GPU that can run on CUDA10 or Higher and that supports Compute Capabilities 5.0 or higher we can not help you get the game detected on the GPU. Unless you are following our ROCm guide with a compatible AMD GPU.
 
 #### vocab.json / config.json is not found error
 
-If you get these errors you either did not select the correct folder for your custom model or the model you have downloaded is not (yet) compatible with KoboldAI. There exist a few models out there that are compatible and provide a pytorch_model.bin file but do not ship all the required files. In this case try downloading a compatible model of the same kind (For example another GPT-Neo if you downloaded a GPT-Neo model) and replace the pytorch_model.bin file with the one you are trying to run. Chances are this will work fine.
-
-## KoboldAI Compatible Models
-
-Most of the high quality models have been integrated in the menu, these models have their download link removed since the easiest way to obtain them is to run them directly from the menu. KoboldAI will automatically download and convert the models to a offline format for later use.
-
-If you have old 6B versions which end in -hf they will no longer be compatible with the newer versions of transformers and will no longer behave correctly. It is highly recommended that you install the official version of transformers (offline installers for KoboldAI contain this version by default) and redownload these models from the menu to get compatible versions. If you have very limited internet we will for a limited time also offer finetuneanon's fork in the install_requirements.bat file, when using that option you will not be able to use the 6B models in our main menu so definitely upgrade when your internet allows.
-
-The VRAM requirements amounts are the recommended amounts for fast smooth play, playing with lower VRAM is possible but then you may need to either lower the amount of tokens in the settings, or you may need to put less layers on your GPU causing a significant performance loss. 
-
-**For CPU players and during the loading regular RAM usage is double of what we list here.**
-
-
-
-| **Model**                                                    | Type                              | **(V)RAM** | Repetition Penalty | Description                                                  |
-| ------------------------------------------------------------ | --------------------------------- | ---------- | ------------------ | ------------------------------------------------------------ |
-| Skein 6B by VE_FORBDRYERNE | Adventure Novel / 6B / Neo Custom | 16GB       | 1.1                | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |
-| Adventure 6B by VE_FORBRYDERNE | Adventure / 6B / Neo Custom       | 16GB       | 1.2                | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
-| Adventure 2.7B by melastashco | Adventure / 2.7B / Neo Custom     | 8GB        | 2.0                | This is one of the closest replications of the original AI Dungeon Classic model. Tuned on the same data that got uploaded alongside AI Dungeon. In KoboldAI we noticed this model performs better than the conversions of the original AI Dungeon model. It has all the traits you expect of AI Dungeon Classic while not having as many artifacts as this model was trained specifically for KoboldAI. Must be played with Adventure mode enabled to prevent it from doing actions on your behalf. |
-| Horni 2.7B by finetuneanon | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | One of the best novel models available for 2.7B focused on NSFW content. This model trains the AI to write in a story like fashion using a very large collection of Literotica stories. It is one of the original finetuned models for 2.7B. |
-| Horni-LN 2.7B by finetuneanon | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | This model is much like the one above, but has been additionally trained on regular light novels. More likely to go SFW and is more focused towards themes found in these light novels over general cultural references. This is a good model for Novel writing especially if you want to add erotica to the mix. |
-| Picard 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | Picard is another Novel model, this time exclusively focused on SFW content of various genres. Unlike the name suggests this goes far beyond Star Trek stories and is not exclusively sci-fi. |
-| Janeway 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity.|
-| Shinen 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | The most NSFW of them all, Shinen WILL make things sexual. This model will assume that whatever you are doing is meant to be a sex story and will sexualize constantly. It is designed for people who find Horni to tame. It was trained on SexStories instead of Literotica and was trained on tags making it easier to guide the AI to the right context. |
-| [AID-16Bit](https://storage.henk.tech/KoboldAI/aid-16bit.zip) | Adventure / 1.5B / GPT-2 Custom   | 4GB        | 2.0                | The original AI Dungeon Classic model converted to Pytorch and then converted to a 16-bit Model making it half the size. |
-| [model_v5_pytorch](https://storage.henk.tech/KoboldAI/model_v5_pytorch.zip) (AI Dungeon's Original Model) | Adventure / 1.5B / GPT-2 Custom   | 8GB       | 2.0                | This is the original AI Dungeon Classic model converted to the Pytorch format compatible with AI Dungeon Clover and KoboldAI. We consider this model inferior to the GPT-Neo version because it has more artifacting due to its conversion. This is however the most authentic you can get to AI Dungeon Classic. |
-| [Novel 774M](https://storage.henk.tech/KoboldAI/Novel%20model%20774M.rar) | Novel / 774M / GPT-2 Custom       | 4GB        | 2.0                | Novel 774M is made by the AI Dungeon Clover community, because of its small size and novel bias it is more suitable for CPU players that want to play with speed over substance or players who want to test a GPU with a low amount of VRAM. These performance savings are at the cost of story quality and you should not expect the kind of in depth story capabilities that the larger models offer. It was trained for SFW stories. |
-| [Smut 774M](https://storage.henk.tech/KoboldAI/Smut%20model%20774M%2030K.rar) | Novel / 774M / GPT-2 Custom       | 4GB        | 2.0                | The NSFW version of the above, its a smaller GPT-2 based model made by the AI Dungeon Clover community. Gives decent speed on a CPU at the cost of story quality like the other 774M models. |
-| [Mia (GPT-Neo-125M-AID)](https://huggingface.co/KoboldAI/GPT-Neo-125M-AID) by Henk717 | Adventure / 125M / Neo Custom     | 1GB        | 2.0                | Mia is the smallest Adventure model, it runs at very fast speeds on the CPU which makes it a good testing model for developers who do not have GPU access. Because of its small size it will constantly attempt to do actions on behalf of the player and it will not produce high quality stories. If you just need a small model for a quick test, or if you want to take the challenge of trying to run KoboldAI entirely on your phone this would be an easy model to use due to its small RAM requirements and fast (loading) speeds. |
+If you get these errors you either did not select the correct folder for your custom model or the model you have downloaded is not (yet) compatible with KoboldAI. There exist a few models out there that are compatible and provide a pytorch\_model.bin file but do not ship all the required files. In this case try downloading a compatible model of the same kind (For example another GPT-Neo if you downloaded a GPT-Neo model) and replace the pytorch\_model.bin file with the one you are trying to run. Chances are this will work fine.
 
 ## Softprompts
 
-Softprompts (also known as Modules in other products) are addons that can change the output of existing models. For example you may load a softprompt that biases the AI towards a certain subject and style like transcripts from your favorite TV show. 
+Softprompts (also known as Modules in other products) are addons that can change the output of existing models. For example you may load a softprompt that biases the AI towards a certain subject and style like transcripts from your favorite TV show.
 
 Since these softprompts are often based on existing franchises we currently do not bundle any of them with KoboldAI due to copyright concerns (We do not want to put the entire project at risk). Instead look at community resources like #softprompts on the [KoboldAI Discord](https://discord.gg/XuQWadgU9k) or the [community hosted mirror](https://storage.henk.tech/KoboldAI/softprompts/) .
 
@@ -188,10 +203,10 @@ Training softprompts can be done for free with the [mtj-softtuner colab](https:/
 
 ## Userscripts
 
-Userscripts are scripts that can automate tasks in KoboldAI, or modify the AI behavior / input / output.
+Userscripts are scripts that can automate tasks in KoboldAI, or modify the AI behavior / input / output.  
 Scripting is done in LUA5.4 (Lua does not need to be separately installed as long as you got all the python requirements) and has sandboxing to help protect you from malicious behavior. Even with these measures in place we strongly advise you only run userscripts from places you trust and/or understand, otherwise consult the community for advice on how safe the script might be.
 
-Inside the userscripts folder you will find our kaipreset scripts, these are default scripts that we think will be useful for our users. These scripts are automatically overwritten when you update KoboldAI, if you wish to modify these scripts make sure to first rename them to something else that does not contain kaipreset so your changes are not lost. These scripts range from a You Bias filter that prevents the AI from addressing characters as you. Ways to be able to prevent the AI from using words, word replacements and more. 
+Inside the userscripts folder you will find our kaipreset scripts, these are default scripts that we think will be useful for our users. These scripts are automatically overwritten when you update KoboldAI, if you wish to modify these scripts make sure to first rename them to something else that does not contain kaipreset so your changes are not lost. These scripts range from a You Bias filter that prevents the AI from addressing characters as you. Ways to be able to prevent the AI from using words, word replacements and more.
 
 Along with our preset scripts we also ship examples in the examples folder that merely serve as a demonstration and do not enhance your usage of KoboldAI. To use these scripts make sure to move them out of the examples folder before either using or modifying the script.
 
@@ -203,16 +218,17 @@ For our TPU versions keep in mind that scripts modifying AI behavior relies on a
 
 This project contains work from the following contributors :
 
-- The Gantian - Creator of KoboldAI, has created most features such as the interface, the different AI model / API integrations and in general the largest part of the project.
-- VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration, scripting support, softpromtps and much more. As well as vastly improving the TPU compatibility and integrating external code into KoboldAI so we could use official versions of Transformers with virtually no downsides.
-- Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. He also optimized the model loading so that downloaded models get converted to efficient offline models and that in future models are more likely to work out of the box.  Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can.
-- Ebolam - Automatic Saving
-- Frogging101 - top_k / tfs support (Part of this support was later redone by VE to integrate what was originally inside of finetuneanon's transformers)
-- UWUplus (Ralf) - Contributed storage systems for community colabs, as well as cleaning up and integrating the website dependencies/code better. He is also the maintainer of flask-cloudflared which we use to generate the cloudflare links.
-- Javalar - Initial Performance increases on the story_refresh
-- LexSong - Initial environment file adaptation for conda that served as a basis for the install_requirements.bat overhaul.
-- Arrmansa - Breakmodel support for other projects that served as a basis for VE FORBRYDERNE's integration.
-- Jojorne - Small improvements to the response selection for gens per action.
+*   The Gantian - Creator of KoboldAI, has created most features such as the interface, the different AI model / API integrations and in general the largest part of the project.
+*   VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration, scripting support, softpromtps and much more. As well as vastly improving the TPU compatibility and integrating external code into KoboldAI so we could use official versions of Transformers with virtually no downsides.
+*   Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. He also optimized the model loading so that downloaded models get converted to efficient offline models and that in future models are more likely to work out of the box. Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can.
+*   Ebolam - Automatic Saving
+*   Frogging101 - top\_k / tfs support (Part of this support was later redone by VE to integrate what was originally inside of finetuneanon's transformers)
+*   UWUplus (Ralf) - Contributed storage systems for community colabs, as well as cleaning up and integrating the website dependencies/code better. He is also the maintainer of flask-cloudflared which we use to generate the cloudflare links.
+*   Javalar - Initial Performance increases on the story\_refresh
+*   LexSong - Initial environment file adaptation for conda that served as a basis for the install\_requirements.bat overhaul.
+*   Arrmansa - Breakmodel support for other projects that served as a basis for VE FORBRYDERNE's integration.
+*   Jojorne - Small improvements to the response selection for gens per action.
+*   OccultSage (GooseAI) - Improved support for GooseAI/OpenAI
 
 As well as various Model creators who will be listed near their models, and all the testers who helped make this possible!
 
@@ -222,4 +238,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t
 
 KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.).
 
-umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license.
+umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 6aef508d..7b5b967c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,13 @@
-transformers
+transformers>=4.19
 Flask
 Flask-SocketIO
 requests
-torch
+torch==1.11
 flask-cloudflared
 flask-ngrok
 eventlet
 lupa==1.10
 markdown
-bleach==4.1.0
\ No newline at end of file
+bleach==4.1.0
+sentencepiece
+protobuf
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 39175a62..0f723a49 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -1,11 +1,11 @@
+torch >= 1.9, <= 1.11
 numpy
 tqdm
 requests
 optax >= 0.0.5, <= 0.0.9
 dm-haiku == 0.0.5
-ray[default]
 jax == 0.2.21
-transformers
+transformers >= 4.19
 progressbar2
 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
 flask
diff --git a/static/application.js b/static/application.js
index 1a6b8fe8..cfb25883 100644
--- a/static/application.js
+++ b/static/application.js
@@ -25,6 +25,7 @@ var button_mode_label;
 var button_send;
 var button_actmem;
 var button_actback;
+var button_actfwd;
 var button_actretry;
 var button_actwi;
 var game_text;
@@ -38,6 +39,7 @@ var anote_menu;
 var anote_input;
 var anote_labelcur;
 var anote_slider;
+var debug_area;
 var popup;
 var popup_title;
 var popup_content;
@@ -49,6 +51,7 @@ var aidg_accept;
 var aidg_close;
 var saveaspopup;
 var saveasinput;
+var savepins;
 var topic;
 var saveas_accept;
 var saveas_close;
@@ -115,10 +118,33 @@ var adventure = false;
 // Chatmode
 var chatmode = false;
 
+var sliders_throttle = getThrottle(200);
+
 //=================================================================//
 //  METHODS
 //=================================================================//
 
+/**
+ * Returns a function that will automatically wait for X ms before executing the callback
+ * The timer is reset each time the returned function is called
+ * Useful for methods where something is overridden too fast
+ * @param ms milliseconds to wait before executing the callback
+ * @return {(function(*): void)|*} function that takes the ms to wait and a callback to execute after the timer
+ */
+function getThrottle(ms) {
+    var timer = {};
+
+    return function (id, callback) {
+        if (timer[id]) {
+            clearTimeout(timer[id]);
+        }
+        timer[id] = setTimeout(function () {
+            callback();
+            delete timer[id];
+        }, ms);
+    }
+}
+
 function addSetting(ob) {	
 	// Add setting block to Settings Menu
 	if(ob.uitype == "slider"){
@@ -127,9 +153,7 @@ function addSetting(ob) {
 			<div class=\"justifyleft\">\
 				"+ob.label+" <span class=\"helpicon\">?<span class=\"helptext\">"+ob.tooltip+"</span></span>\
 			</div>\
-			<div class=\"justifyright\" id=\""+ob.id+"cur\">\
-				"+ob.default+"\
-			</div>\
+			<input inputmode=\""+(ob.unit === "float" ? "decimal" : "numeric")+"\" class=\"justifyright flex-push-right\" id=\""+ob.id+"cur\" value=\""+ob.default+"\">\
 		</div>\
 		<div>\
 			<input type=\"range\" class=\"form-range airange\" min=\""+ob.min+"\" max=\""+ob.max+"\" step=\""+ob.step+"\" id=\""+ob.id+"\">\
@@ -149,8 +173,37 @@ function addSetting(ob) {
 		window["setting_"+ob.id] = refin;  // Is this still needed?
 		window["label_"+ob.id]   = reflb;  // Is this still needed?
 		// Add event function to input
-		refin.on("input", function () {
-			socket.send({'cmd': $(this).attr('id'), 'data': $(this).val()});
+		var updateLabelColor = function () {
+			var value = (ob.unit === "float" ? parseFloat : parseInt)(reflb.val());
+			if(value > ob.max || value < ob.min) {
+				reflb.addClass("setting-value-warning");
+			} else {
+				reflb.removeClass("setting-value-warning");
+			}
+		}
+		var send = function () {
+			sliders_throttle(ob.id, function () {
+			    socket.send({'cmd': $(refin).attr('id'), 'data': $(reflb).val()});
+			});
+		}
+		refin.on("input", function (event) {
+			reflb.val(refin.val());
+			updateLabelColor();
+			send();
+		}).on("change", updateLabelColor);
+		reflb.on("change", function (event) {
+			var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value);
+			if(Number.isNaN(value) || (ob.min >= 0 && value < 0)) {
+				event.target.value = refin.val();
+				return;
+			}
+			if (ob.unit === "float") {
+				value = parseFloat(value.toFixed(3));  // Round to 3 decimal places to help avoid the number being too long to fit in the box
+			}
+			refin.val(value);
+			reflb.val(value);
+			updateLabelColor();
+			send();
 		});
 	} else if(ob.uitype == "toggle"){
 		settings_menu.append("<div class=\"settingitem\">\
@@ -748,7 +801,7 @@ function enterMemoryMode() {
 	setchatnamevisibility(false);
 	showMessage("Edit the memory to be sent with each request to the AI.");
 	button_actmem.html("Cancel");
-	hide([button_actback, button_actretry, button_actwi]);
+	hide([button_actback, button_actfwd, button_actretry, button_actwi]);
 	// Display Author's Note field
 	anote_menu.slideDown("fast");
 }
@@ -759,7 +812,7 @@ function exitMemoryMode() {
 	setchatnamevisibility(chatmode);
 	hideMessage();
 	button_actmem.html("Memory");
-	show([button_actback, button_actretry, button_actwi]);
+	show([button_actback, button_actfwd, button_actretry, button_actwi]);
 	input_text.val("");
 	// Hide Author's Note field
 	anote_menu.slideUp("fast");
@@ -768,7 +821,7 @@ function exitMemoryMode() {
 function enterWiMode() {
 	showMessage("World Info will be added to memory only when the key appears in submitted text or the last action.");
 	button_actwi.html("Accept");
-	hide([button_actback, button_actmem, button_actretry, game_text]);
+	hide([button_actback, button_actfwd, button_actmem, button_actretry, game_text]);
 	setchatnamevisibility(false);
 	show([wi_menu]);
 	disableSendBtn();
@@ -780,7 +833,7 @@ function exitWiMode() {
 	button_actwi.html("W Info");
 	hide([wi_menu]);
 	setchatnamevisibility(chatmode);
-	show([button_actback, button_actmem, button_actretry, game_text]);
+	show([button_actback, button_actfwd, button_actmem, button_actretry, game_text]);
 	enableSendBtn();
 	$("#gamescreen").removeClass("wigamescreen");
 }
@@ -884,7 +937,7 @@ function hideSaveAsPopup() {
 }
 
 function sendSaveAsRequest() {
-	socket.send({'cmd': 'saveasrequest', 'data': saveasinput.val()});
+	socket.send({'cmd': 'saveasrequest', 'data': {"name": saveasinput.val(), "pins": savepins.val()}});
 }
 
 function showLoadPopup() {
@@ -1142,9 +1195,9 @@ function updateSPStatItems(items) {
 function setStartState() {
 	enableSendBtn();
 	enableButtons([button_actmem, button_actwi]);
-	disableButtons([button_actback, button_actretry]);
+	disableButtons([button_actback, button_actfwd, button_actretry]);
 	hide([wi_menu]);
-	show([game_text, button_actmem, button_actwi, button_actback, button_actretry]);
+	show([game_text, button_actmem, button_actwi, button_actback, button_actfwd, button_actretry]);
 	hideMessage();
 	hideWaitAnimation();
 	button_actmem.html("Memory");
@@ -1160,10 +1213,41 @@ function parsegenseqs(seqs) {
 	seqselcontents.html("");
 	var i;
 	for(i=0; i<seqs.length; i++) {
-		seqselcontents.append("<div class=\"seqselitem\" id=\"seqsel"+i+"\" n=\""+i+"\">"+seqs[i].generated_text+"</div>");
+		//setup selection data
+		text_data = "<table><tr><td width=100%><div class=\"seqselitem\" id=\"seqsel"+i+"\" n=\""+i+"\">"+seqs[i][0]+"</div></td><td width=10>"
+		
+		//Now do the icon (pin/redo)
+		
+		if (seqs[i][1] == "redo") {
+			text_data = text_data + "<span style=\"color: white\" class=\"oi oi-loop-circular\" title=\"Redo\" aria-hidden=\"true\" id=\"seqselpin"+i+"\" n=\""+i+"\"></span>"
+		} else if (seqs[i][1] == "pinned") {
+			text_data = text_data + "<span style=\"color: white\" class=\"oi oi-pin\" title=\"Pin\" aria-hidden=\"true\" id=\"seqselpin"+i+"\" n=\""+i+"\"></span>"
+		} else {
+			text_data = text_data + "<span style=\"color: grey\" class=\"oi oi-pin\" title=\"Pin\" aria-hidden=\"true\" id=\"seqselpin"+i+"\" n=\""+i+"\"></span>"
+		}
+		text_data = text_data + "</td></tr></table>"
+		seqselcontents.append(text_data);
+		
+		//setup on-click actions
 		$("#seqsel"+i).on("click", function () {
 			socket.send({'cmd': 'seqsel', 'data': $(this).attr("n")});
 		});
+		
+		//onclick for pin only
+		if (seqs[i][1] != "redo") {
+			$("#seqselpin"+i).on("click", function () {
+				socket.send({'cmd': 'seqpin', 'data': $(this).attr("n")});
+				if ($(this).attr("style") == "color: grey") {
+					console.log($(this).attr("style"));
+					$(this).css({"color": "white"});
+					console.log($(this).attr("style"));
+				} else {
+					console.log($(this).attr("style"));
+					$(this).css({"color": "grey"});
+					console.log($(this).attr("style"));
+				}
+			});
+		}
 	}
 	$('#seqselmenu').slideDown("slow");
 }
@@ -1756,6 +1840,7 @@ $(document).ready(function(){
 	button_send       = $('#btnsend');
 	button_actmem     = $('#btn_actmem');
 	button_actback    = $('#btn_actundo');
+	button_actfwd     = $('#btn_actredo');
 	button_actretry   = $('#btn_actretry');
 	button_actwi      = $('#btn_actwi');
 	game_text         = $('#gametext');
@@ -1765,6 +1850,7 @@ $(document).ready(function(){
 	settings_menu     = $("#settingsmenu");
 	format_menu       = $('#formatmenu');
 	anote_menu        = $('#anoterowcontainer');
+	debug_area        = $('#debugcontainer');
 	wi_menu           = $('#wimenu');
 	anote_input       = $('#anoteinput');
 	anote_labelcur    = $('#anotecur');
@@ -1780,6 +1866,7 @@ $(document).ready(function(){
 	aidg_close        = $("#btn_aidgpopupclose");
 	saveaspopup       = $("#saveascontainer");
 	saveasinput       = $("#savename");
+	savepins          = $("#savepins");
 	topic             = $("#topic");
 	saveas_accept     = $("#btn_saveasaccept");
 	saveas_close      = $("#btn_saveasclose");
@@ -1928,13 +2015,13 @@ $(document).ready(function(){
 			// Enable or Disable buttons
 			if(msg.data == "ready") {
 				enableSendBtn();
-				enableButtons([button_actmem, button_actwi, button_actback, button_actretry]);
+				enableButtons([button_actmem, button_actwi, button_actback, button_actfwd, button_actretry]);
 				hideWaitAnimation();
 				gamestate = "ready";
 			} else if(msg.data == "wait") {
 				gamestate = "wait";
 				disableSendBtn();
-				disableButtons([button_actmem, button_actwi, button_actback, button_actretry]);
+				disableButtons([button_actmem, button_actwi, button_actback, button_actfwd, button_actretry]);
 				showWaitAnimation();
 			} else if(msg.data == "start") {
 				setStartState();
@@ -1988,74 +2075,81 @@ $(document).ready(function(){
 			newTextHighlight($("#n"+msg.data))
 		} else if(msg.cmd == "updatetemp") {
 			// Send current temp value to input
-			$("#settemp").val(parseFloat(msg.data));
-			$("#settempcur").html(msg.data);
+			$("#settempcur").val(msg.data);
+			$("#settemp").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetopp") {
 			// Send current top p value to input
-			$("#settopp").val(parseFloat(msg.data));
-			$("#settoppcur").html(msg.data);
+			$("#settoppcur").val(msg.data);
+			$("#settopp").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetopk") {
 			// Send current top k value to input
-			$("#settopk").val(parseFloat(msg.data));
-			$("#settopkcur").html(msg.data);
+			$("#settopkcur").val(msg.data);
+			$("#settopk").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetfs") {
 			// Send current tfs value to input
-			$("#settfs").val(parseFloat(msg.data));
-			$("#settfscur").html(msg.data);
+			$("#settfscur").val(msg.data);
+			$("#settfs").val(parseFloat(msg.data)).trigger("change");
+		} else if(msg.cmd == "updatetypical") {
+			// Send current typical value to input
+			$("#settypicalcur").val(msg.data);
+			$("#settypical").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppen") {
 			// Send current rep pen value to input
-			$("#setreppen").val(parseFloat(msg.data));
-			$("#setreppencur").html(msg.data);
+			$("#setreppencur").val(msg.data);
+			$("#setreppen").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppenslope") {
 			// Send current rep pen value to input
-			$("#setreppenslope").val(parseFloat(msg.data));
-			$("#setreppenslopecur").html(msg.data);
+			$("#setreppenslopecur").val(msg.data);
+			$("#setreppenslope").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatereppenrange") {
 			// Send current rep pen value to input
-			$("#setreppenrange").val(parseFloat(msg.data));
-			$("#setreppenrangecur").html(msg.data);
+			$("#setreppenrangecur").val(msg.data);
+			$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateoutlen") {
 			// Send current output amt value to input
-			$("#setoutput").val(parseInt(msg.data));
-			$("#setoutputcur").html(msg.data);
+			$("#setoutputcur").val(msg.data);
+			$("#setoutput").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updatetknmax") {
 			// Send current max tokens value to input
-			$("#settknmax").val(parseInt(msg.data));
-			$("#settknmaxcur").html(msg.data);
+			$("#settknmaxcur").val(msg.data);
+			$("#settknmax").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateikgen") {
 			// Send current max tokens value to input
-			$("#setikgen").val(parseInt(msg.data));
-			$("#setikgencur").html(msg.data);
+			$("#setikgencur").val(msg.data);
+			$("#setikgen").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "setlabeltemp") {
 			// Update setting label with value from server
-			$("#settempcur").html(msg.data);
+			$("#settempcur").val(msg.data);
 		} else if(msg.cmd == "setlabeltopp") {
 			// Update setting label with value from server
-			$("#settoppcur").html(msg.data);
+			$("#settoppcur").val(msg.data);
 		} else if(msg.cmd == "setlabeltopk") {
 			// Update setting label with value from server
-			$("#settopkcur").html(msg.data);
+			$("#settopkcur").val(msg.data);
 		} else if(msg.cmd == "setlabeltfs") {
 			// Update setting label with value from server
-			$("#settfscur").html(msg.data);
+			$("#settfscur").val(msg.data);
+		} else if(msg.cmd == "setlabeltypical") {
+			// Update setting label with value from server
+			$("#settypicalcur").val(msg.data);
 		} else if(msg.cmd == "setlabelreppen") {
 			// Update setting label with value from server
-			$("#setreppencur").html(msg.data);
+			$("#setreppencur").val(msg.data);
 		} else if(msg.cmd == "setlabelreppenslope") {
 			// Update setting label with value from server
-			$("#setreppenslopecur").html(msg.data);
+			$("#setreppenslopecur").val(msg.data);
 		} else if(msg.cmd == "setlabelreppenrange") {
 			// Update setting label with value from server
-			$("#setreppenrangecur").html(msg.data);
+			$("#setreppenrangecur").val(msg.data);
 		} else if(msg.cmd == "setlabeloutput") {
 			// Update setting label with value from server
-			$("#setoutputcur").html(msg.data);
+			$("#setoutputcur").val(msg.data);
 		} else if(msg.cmd == "setlabeltknmax") {
 			// Update setting label with value from server
-			$("#settknmaxcur").html(msg.data);
+			$("#settknmaxcur").val(msg.data);
 		} else if(msg.cmd == "setlabelikgen") {
 			// Update setting label with value from server
-			$("#setikgencur").html(msg.data);
+			$("#setikgencur").val(msg.data);
 		} else if(msg.cmd == "updateanotedepth") {
 			// Send current Author's Note depth value to input
 			anote_slider.val(parseInt(msg.data));
@@ -2226,15 +2320,15 @@ $(document).ready(function(){
 			$("#setnumseqcur").html(msg.data);
 		} else if(msg.cmd == "updatenumseq") {
 			// Send current max tokens value to input
-			$("#setnumseq").val(parseInt(msg.data));
 			$("#setnumseqcur").html(msg.data);
+			$("#setnumseq").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "setlabelwidepth") {
 			// Update setting label with value from server
 			$("#setwidepthcur").html(msg.data);
 		} else if(msg.cmd == "updatewidepth") {
 			// Send current max tokens value to input
-			$("#setwidepth").val(parseInt(msg.data));
 			$("#setwidepthcur").html(msg.data);
+			$("#setwidepth").val(parseInt(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateuseprompt") {
 			// Update toggle state
 			$("#setuseprompt").prop('checked', msg.data).change();
@@ -2269,6 +2363,14 @@ $(document).ready(function(){
 		} else if(msg.cmd == "runs_remotely") {
 			remote = true;
 			hide([button_savetofile, button_import, button_importwi]);
+		} else if(msg.cmd == "debug_info") {
+			$("#debuginfo").val(msg.data);
+		} else if(msg.cmd == "set_debug") {
+			if(msg.data) {
+				debug_area.removeClass("hidden");
+			} else {
+				debug_area.addClass("hidden");
+			}
 		}
 	});
 	
@@ -2349,6 +2451,12 @@ $(document).ready(function(){
 		hidegenseqs();
 	});
 	
+	button_actfwd.on("click", function(ev) {
+		hideMessage();
+		//hidegenseqs();
+		socket.send({'cmd': 'redo', 'data': ''});
+	});
+	
 	button_actmem.on("click", function(ev) {
 		socket.send({'cmd': 'memory', 'data': ''});
 	});
diff --git a/static/custom.css b/static/custom.css
index fd6ebff7..d70fd34e 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -22,6 +22,25 @@ chunk.editing, chunk.editing * {
 	font-style: normal !important;
 }
 
+.setting-value-warning {
+	color: #ff7777;
+}
+
+.setting-value-warning:focus {
+	color: #ffaaaa !important;
+}
+
+.settinglabel input {
+	width: 5ch;
+	background-color: inherit;
+	border: none;
+	outline: none;
+}
+
+.settinglabel input:focus {
+	color: #cdf;
+}
+
 #gametext, chunk, chunk * {
 	outline: 0px solid transparent;
 }
@@ -1273,8 +1292,8 @@ body.connected .popupfooter, .popupfooter.always-available {
 
 .settinglabel {
 	color: #ffffff;
-	display: grid;
-	grid-template-columns: 80% 20%;
+	display: flex;
+	flex-flow: wrap;
 }
 
 .settingminmax {
diff --git a/structures.py b/structures.py
index 287f92c1..53bf1ba2 100644
--- a/structures.py
+++ b/structures.py
@@ -19,10 +19,16 @@ class KoboldStoryRegister(collections.OrderedDict):
         return self.popitem()[1]
     
     def get_first_key(self) -> int:
-        return next(iter(self))
+        if len(self) == 0:
+            return -1
+        else:
+            return next(iter(self))
 
     def get_last_key(self) -> int:
-        return next(reversed(self))
+        if len(self) == 0:
+            return -1
+        else:
+            return next(reversed(self))
 
     def __getitem__(self, k: int) -> str:
         return super().__getitem__(k)
diff --git a/templates/index.html b/templates/index.html
index 40812cee..07bcdcc5 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -9,7 +9,7 @@
 	<link rel="stylesheet" href="static/bootstrap.min.css">
 	<link rel="stylesheet" href="static/bootstrap-toggle.min.css">
 	<link rel="stylesheet" href="static/open-iconic-bootstrap.min.css">
-	<link rel="stylesheet" href="static/custom.css?ver=1.17">
+	<link rel="stylesheet" href="static/custom.css?ver=1.18b">
 
 	<script src="static/jquery-3.6.0.min.js"></script>
 	<script src="static/jquery-ui.sortable.min.js"></script>
@@ -17,7 +17,7 @@
 	<script src="static/bootstrap.min.js"></script>
 	<script src="static/bootstrap-toggle.min.js"></script>
 	<script src="static/rangy-core.min.js"></script>
-	<script src="static/application.js?ver=1.17a"></script>
+	<script src="static/application.js?ver=1.18b"></script>
 </head>
 <body>
 	<input type="file" id="remote-save-select" accept="application/json" style="display:none">
@@ -123,6 +123,7 @@
 				<button type="button" class="btn btn-primary" id="btn_actmem">Memory</button>
 				<button type="button" class="btn btn-primary" id="btn_actwi">W Info</button>
 				<button type="button" class="btn btn-primary" id="btn_actundo">Back</button>
+				<button type="button" class="btn btn-primary" id="btn_actredo">Redo</button>
 				<button type="button" class="btn btn-primary" id="btn_actretry">Retry</button>
 			</div>
 			<input type="text" id="chatname" class="form-control hidden" placeholder="Chat name">
@@ -185,6 +186,9 @@
 					</div>
 				</div>
 			</div>
+			<div  class="hidden" id="debugcontainer">
+				<textarea class="form-control" placeholder="Debug Info" id="debuginfo"></textarea>
+			</div>
 		</div>
 	</div>
 	<div class="hidden" id="popupcontainer">
@@ -228,7 +232,9 @@
 				<div class="popuptitletext">Enter Name For Save</div>
 			</div>
 			<div class="aidgpopupcontent">
-				<input class="form-control" type="text" placeholder="Save Name" id="savename">
+				<input class="form-control" type="text" placeholder="Save Name" id="savename"><br>
+				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="savepins" checked>
+				<div class="box-label">Save Pin Information</div>
 			</div>
 			<div class="popuperror hidden">
 				<span></span>
diff --git a/torch_lazy_loader.py b/torch_lazy_loader.py
new file mode 100644
index 00000000..789e56b4
--- /dev/null
+++ b/torch_lazy_loader.py
@@ -0,0 +1,268 @@
+'''
+This file is AGPL-licensed.
+
+Some of the code in this file is copied from PyTorch.
+
+The license for PyTorch is shown below:
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
+   and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+'''
+
+
+import contextlib
+from functools import reduce
+import itertools
+import zipfile
+import pickle
+import torch
+from torch.nn import Module
+from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
+
+
+_EXTRA_STATE_KEY_SUFFIX = '_extra_state'
+
+
+STORAGE_TYPE_MAP = {
+    torch.float64: torch.DoubleStorage,
+    torch.float32: torch.FloatStorage,
+    torch.float16: torch.HalfStorage,
+    torch.int64: torch.LongStorage,
+    torch.int32: torch.IntStorage,
+    torch.int16: torch.ShortStorage,
+    torch.int8: torch.CharStorage,
+    torch.uint8: torch.ByteStorage,
+    torch.bool: torch.BoolStorage,
+    torch.bfloat16: torch.BFloat16Storage,
+}
+
+
+class LazyTensor:
+    def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, dtype: Optional[torch.dtype] = None, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
+        self.storage_type = storage_type
+        self.key = key
+        self.location = location
+        self.dtype = dtype
+        self.seek_offset = seek_offset
+        self.shape = shape
+        self.stride = stride
+        self.requires_grad = requires_grad
+        self.backward_hooks = backward_hooks
+
+    def __view(self, f: Callable):
+        return f"{type(self).__name__}(storage_type={f(self.storage_type)}, key={f(self.key)}, location={f(self.location)}, dtype={f(self.dtype)}, seek_offset={f(self.seek_offset)}, shape={f(self.shape)}, stride={f(self.stride)}, requires_grad={f(self.requires_grad)}, backward_hooks={f(self.backward_hooks)})"
+
+    def __repr__(self):
+        return self.__view(repr)
+
+    def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None, no_grad=True) -> torch.Tensor:
+        size = reduce(lambda x, y: x * y, self.shape, 1)
+        dtype = self.dtype
+        nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
+        if isinstance(checkpoint, zipfile.ZipFile):
+            f = checkpoint.open(f"archive/data/{self.key}", "r")
+            f.read(self.seek_offset)
+        else:
+            f = checkpoint
+        try:
+            storage = STORAGE_TYPE_MAP[dtype].from_buffer(f.read(nbytes), "little")
+        finally:
+            if isinstance(checkpoint, zipfile.ZipFile):
+                f.close()
+        storage = torch.serialization._get_restore_location(map_location)(storage, self.location)
+        tensor = torch.tensor([], dtype=storage.dtype, device=storage.device)
+        tensor.set_(storage, 0, self.shape, self.stride)
+        tensor.requires_grad = not no_grad and self.requires_grad
+        tensor._backward_hooks = self.backward_hooks
+        return tensor
+
+
+class _LazyUnpickler(pickle.Unpickler):
+    lazy_loaded_storages: Dict[str, LazyTensor]
+
+    def __init__(self, *args, **kwargs):
+        self.lazy_loaded_storages = {}
+        return super().__init__(*args, **kwargs)
+
+    def forced_persistent_load(self, saved_id):
+        assert isinstance(saved_id, tuple)
+        typename = saved_id[0]
+        assert typename == "storage", f"Unknown typename for persistent_load, expected 'storage' but got '{typename}'"
+        storage_type, key, location, _ = saved_id[1:]
+        return LazyTensor(storage_type, key, location)
+
+    def load(self, *args, **kwargs):
+        self.persistent_load = self.forced_persistent_load
+        retval = super().load(*args, **kwargs)
+        self.lazy_loaded_storages = {}
+        return retval
+
+
+def _rebuild_tensor(lazy_storage: LazyTensor, storage_offset, shape, stride):
+    lazy_storage.shape = shape
+    lazy_storage.stride = stride
+    dtype = lazy_storage.storage_type.dtype
+    if not isinstance(dtype, torch.dtype):
+        dtype = lazy_storage.storage_type(0).dtype
+    lazy_storage.dtype = dtype
+    lazy_storage.seek_offset = storage_offset if dtype is torch.bool else storage_offset * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
+    return lazy_storage
+
+
+# Modified version of https://github.com/pytorch/pytorch/blob/v1.11.0-rc4/torch/nn/modules/module.py#L1346-L1438
+def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+    for hook in self._load_state_dict_pre_hooks.values():
+        hook(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+
+    persistent_buffers = {k: v for k, v in self._buffers.items() if k not in self._non_persistent_buffers_set}
+    local_name_params = itertools.chain(self._parameters.items(), persistent_buffers.items())
+    local_state = {k: v for k, v in local_name_params if v is not None}
+
+    for name, param in local_state.items():
+        key = prefix + name
+        if key in state_dict:
+            input_param = state_dict[key]
+            if not torch.overrides.is_tensor_like(input_param):
+                error_msgs.append('While copying the parameter named "{}", '
+                                    'expected torch.Tensor or Tensor-like object from checkpoint but '
+                                    'received {}'
+                                    .format(key, type(input_param)))
+                continue
+
+            # This is used to avoid copying uninitialized parameters into
+            # non-lazy modules, since they dont have the hook to do the checks
+            # in such case, it will error when accessing the .shape attribute.
+            is_param_lazy = torch.nn.parameter.is_lazy(param)
+            # Backward compatibility: loading 1-dim tensor from 0.3.* to version 0.4+
+            if not is_param_lazy and len(param.shape) == 0 and len(input_param.shape) == 1:
+                input_param = input_param[0]
+
+            if not is_param_lazy and input_param.shape != param.shape:
+                # local shape should match the one in checkpoint
+                error_msgs.append('size mismatch for {}: copying a param with shape {} from checkpoint, '
+                                    'the shape in current model is {}.'
+                                    .format(key, input_param.shape, param.shape))
+                continue
+            try:
+                with torch.no_grad():
+                    #param.copy_(input_param)
+                    new_param = torch.nn.Parameter(input_param, requires_grad=param.requires_grad)  # This line is new
+                    if name in self._parameters:  # This line is new
+                        self._parameters[name] = new_param  # This line is new
+                    if name in persistent_buffers:  # This line is new
+                        self._buffers[name] = new_param  # This line is new
+            except Exception as ex:
+                error_msgs.append('While copying the parameter named "{}", '
+                                    'whose dimensions in the model are {} and '
+                                    'whose dimensions in the checkpoint are {}, '
+                                    'an exception occurred : {}.'
+                                    .format(key, param.size(), input_param.size(), ex.args))
+        elif strict:
+            missing_keys.append(key)
+
+    extra_state_key = prefix + _EXTRA_STATE_KEY_SUFFIX
+    if hasattr(Module, "set_extra_state") and getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:  # if getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:
+        if extra_state_key in state_dict:
+            self.set_extra_state(state_dict[extra_state_key])
+        elif strict:
+            missing_keys.append(extra_state_key)
+    elif strict and (extra_state_key in state_dict):
+        unexpected_keys.append(extra_state_key)
+
+    if strict:
+        for key in state_dict.keys():
+            if key.startswith(prefix) and key != extra_state_key:
+                input_name = key[len(prefix):]
+                input_name = input_name.split('.', 1)[0]  # get the name of param/buffer/child
+                if input_name not in self._modules and input_name not in local_state:
+                    unexpected_keys.append(key)
+
+
+@contextlib.contextmanager
+def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, dematerialized_modules=False):
+    if not enable:
+        yield False
+        return
+
+    try:
+        old_unpickler = pickle.Unpickler
+        pickle.Unpickler = _LazyUnpickler
+
+        old_rebuild_tensor = torch._utils._rebuild_tensor
+        torch._utils._rebuild_tensor = _rebuild_tensor
+
+        old_torch_load = torch.load
+
+        def torch_load(f, map_location=None, pickle_module=pickle, **pickle_load_args):
+            retval = old_torch_load(f=f, map_location=map_location, pickle_module=pickle_module, **pickle_load_args)
+            if callback is not None:
+                callback(retval, f=f, map_location=map_location, pickle_module=pickle_module, **pickle_load_args)
+            return retval
+
+        torch.load = torch_load
+
+        if dematerialized_modules:
+            old_linear_init = torch.nn.Linear.__init__
+            old_embedding_init = torch.nn.Embedding.__init__
+            old_layernorm_init = torch.nn.LayerNorm.__init__
+
+            def linear_init(self, *args, device=None, **kwargs):
+                return old_linear_init(self, *args, device="meta", **kwargs)
+
+            def embedding_init(self, *args, device=None, **kwargs):
+                return old_embedding_init(self, *args, device="meta", **kwargs)
+
+            def layernorm_init(self, *args, device=None, **kwargs):
+                return old_layernorm_init(self, *args, device="meta", **kwargs)
+
+            torch.nn.Linear.__init__ = linear_init
+            torch.nn.Embedding.__init__ = embedding_init
+            torch.nn.LayerNorm.__init__ = layernorm_init
+            old_load_from_state_dict = torch.nn.Module._load_from_state_dict
+            torch.nn.Module._load_from_state_dict = _load_from_state_dict
+
+        yield True
+
+    finally:
+        pickle.Unpickler = old_unpickler
+        torch._utils._rebuild_tensor = old_rebuild_tensor
+        torch.load = old_torch_load
+        if dematerialized_modules:
+            torch.nn.Linear.__init__ = old_linear_init
+            torch.nn.Embedding.__init__ = old_embedding_init
+            torch.nn.LayerNorm.__init__ = old_layernorm_init
+            torch.nn.Module._load_from_state_dict = old_load_from_state_dict
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 40059425..fb2dc7ae 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -27,23 +27,31 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 '''
 
+import utils
+
 import multiprocessing
 from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
 import progressbar
 import time
 import os
+import sys
+import json
+import zipfile
 import requests
 import random
 import jax
+import jax.dlpack
 from jax.config import config
 from jax.experimental import maps
 import jax.numpy as jnp
 import numpy as np
 import optax
 import haiku as hk
-import transformers
+from transformers import AutoTokenizer, GPT2TokenizerFast, AutoModelForCausalLM, GPTNeoForCausalLM
+from tokenizers import Tokenizer
 from mesh_transformer.checkpoint import read_ckpt_lowmem
-from mesh_transformer.transformer_shard import CausalTransformer, CausalTransformerShard
+from mesh_transformer.transformer_shard import CausalTransformer, CausalTransformerShard, PlaceholderTensor
+from mesh_transformer.util import to_bf16
 
 
 params: Dict[str, Any] = {}
@@ -61,6 +69,7 @@ def settings_callback() -> dict:
         "temp": 0.5,
         "top_k": 0,
         "tfs": 1.0,
+        "typical": 1.0,
         "repetition_penalty": 1.0,
         "rpslope": 0.0,
         "rprange": 0,
@@ -149,11 +158,11 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
     logits[tokens] = penalty_logits
     return logits
 
-def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0):
+def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
     '''
-    This gets called by generate_loop_fn to apply a series of 4 filters
-    to the logits (top-k, then top-p, then TFS, then temperature) before
-    picking one token using the modified logits
+    This gets called by generate_loop_fn to apply a series of 5 filters
+    to the logits (top-k, then top-p, then TFS, then typical, then temperature)
+    before picking one token using the modified logits
     '''
     # Top-k (keep only the k tokens with the highest logits and remove
     # the rest, by setting their logits to negative infinity)
@@ -240,6 +249,37 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0):
         return np.where(indices_to_remove, -np.inf, logits)
     if tfs < 1.0:
         logits = tail_free_filter(logits)
+    # Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
+    def typical_filter(logits):
+        # Compute softmax probabilities and the natural logarithms of them
+        probs = jax.nn.softmax(logits)
+        with np.errstate(divide="ignore"):
+            log_probs = np.log(probs)
+        # Compute the negative of entropy, which is the sum of p*ln(p) for all p
+        # in the set of softmax probabilities of the logits
+        neg_entropy = np.nansum(probs * log_probs, axis=-1, keepdims=True)
+        # Determine absolute difference between the negative entropy and the
+        # log probabilities
+        entropy_deviation = np.abs(neg_entropy - log_probs)
+        # Keep certain tokens such that the sum of the entropy_deviation of the
+        # kept tokens is the smallest possible value such that the sum of the
+        # softmax probabilities of the kept tokens is at least the threshold
+        # value (by sorting the tokens in ascending order of entropy_deviation
+        # and then keeping the smallest possible number of tokens from the
+        # beginning such that sum of softmax probabilities is at or above the
+        # threshold)
+        _, sorted_logits = jax.lax.sort_key_val(entropy_deviation, probs)
+        sorted_indices_to_remove = np.cumsum(sorted_logits, axis=-1) >= typical
+        sorted_indices_to_remove = np.roll(sorted_indices_to_remove, 1, axis=-1)
+        sorted_indices_to_remove[0] = False
+        # Unsort and remove
+        _, indices_to_remove = jax.lax.sort_key_val(
+            jnp.argsort(entropy_deviation),
+            sorted_indices_to_remove,
+        )
+        return np.where(indices_to_remove, -jnp.inf, logits)
+    if typical < 1.0:
+        logits = typical_filter(logits)
     # Temperature (just divide the logits by the temperature)
     logits /= temp
     # Finally, pick one token using the softmax thingy again (it gives
@@ -292,11 +332,11 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
     # positions in the logits array
     return logits.at[tokens].set(penalty_logits)
 
-def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0):
+def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
     '''
-    This gets called by generate_loop_fn to apply a series of 4 filters
-    to the logits (top-k, then top-p, then TFS, then temperature) before
-    picking one token using the modified logits
+    This gets called by generate_loop_fn to apply a series of 5 filters
+    to the logits (top-k, then top-p, then TFS, then typical, then temperature)
+    before picking one token using the modified logits
     '''
     # Top-k (keep only the k tokens with the highest logits and remove
     # the rest, by setting their logits to negative infinity)
@@ -380,6 +420,35 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0):
         )
         return jnp.where(indices_to_remove, -jnp.inf, logits)
     logits = jax.lax.cond(tfs < 1.0, tail_free_filter, lambda x: x, logits)
+    # Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
+    def typical_filter(logits):
+        # Compute softmax probabilities and the natural logarithms of them
+        probs = jax.nn.softmax(logits)
+        log_probs = jnp.log(probs)
+        # Compute the negative of entropy, which is the sum of p*ln(p) for all p
+        # in the set of softmax probabilities of the logits
+        neg_entropy = jnp.nansum(probs * log_probs, axis=-1, keepdims=True)
+        # Determine absolute difference between the negative entropy and the
+        # log probabilities
+        entropy_deviation = jnp.abs(neg_entropy - log_probs)
+        # Keep certain tokens such that the sum of the entropy_deviation of the
+        # kept tokens is the smallest possible value such that the sum of the
+        # softmax probabilities of the kept tokens is at least the threshold
+        # value (by sorting the tokens in ascending order of entropy_deviation
+        # and then keeping the smallest possible number of tokens from the
+        # beginning such that sum of softmax probabilities is at or above the
+        # threshold)
+        _, sorted_logits = jax.lax.sort_key_val(entropy_deviation, probs)
+        sorted_indices_to_remove = jnp.cumsum(sorted_logits, axis=-1) >= typical
+        sorted_indices_to_remove = jnp.roll(sorted_indices_to_remove, 1, axis=-1)
+        sorted_indices_to_remove = sorted_indices_to_remove.at[0].set(False)
+        # Unsort and remove
+        _, indices_to_remove = jax.lax.sort_key_val(
+            jnp.argsort(entropy_deviation),
+            sorted_indices_to_remove,
+        )
+        return jnp.where(indices_to_remove, -jnp.inf, logits)
+    logits = jax.lax.cond(typical < 1.0, typical_filter, lambda x: x, logits)
     # Temperature (just divide the logits by the temperature)
     def temp_filter(logits):
         return logits / temp
@@ -443,9 +512,9 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
     return carry
 
 class PenalizingCausalTransformer(CausalTransformer):
-    def __init__(self, config):
+    def __init__(self, config, **kwargs):
         # Initialize
-        super().__init__(config)
+        super().__init__(config, **kwargs)
         def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None):
             compiling_callback()
             numseqs = numseqs_aux.shape[0]
@@ -736,6 +805,7 @@ def infer_static(
     temp=0.5,
     top_k=0,
     tfs=1.0,
+    typical=1.0,
     repetition_penalty=1.0,
     rpslope=0.0,
     rprange=0,
@@ -758,6 +828,7 @@ def infer_static(
         "temp": temp * np.ones(total_batch),
         "top_p": top_p * np.ones(total_batch),
         "tfs": tfs * np.ones(total_batch),
+        "typical": typical * np.ones(total_batch),
         "repetition_penalty": repetition_penalty * np.ones(total_batch),
         "rpslope": rpslope * np.ones(total_batch),
         "rprange": np.full(total_batch, rprange, dtype=np.uint32),
@@ -776,7 +847,142 @@ def infer_static(
     return samples
 
 
-def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs) -> None:
+def reshard_reverse(x, total_shards, old_shape):
+    assert len(x.shape) != 1
+    if len(x.shape) == 2:
+        if old_shape[1] == x.shape[1]:
+            out = x[0:1].tile((total_shards, 1))
+        else:
+            out = x.reshape(old_shape)
+    elif len(x.shape) == 3:
+        if x.shape[0] * x.shape[2] == old_shape[2]:
+            out = x.reshape(old_shape)
+        elif x.shape[0] * x.shape[1] == old_shape[1]:
+            out = x.reshape((old_shape[1], old_shape[0], old_shape[2])).permute((1, 0, 2))
+        else:
+            assert False
+    else:
+        assert False
+    return out
+
+
+def get_old_shape(t, total_shards, dim=2):
+    if len(t.shape) == 2:
+        shard_shape = t.shape
+        if dim == 1:
+            assert shard_shape[0] % total_shards == 0
+            return (shard_shape[0] // total_shards, shard_shape[1])
+        elif dim == 2:
+            assert shard_shape[1] % total_shards == 0
+            return (shard_shape[0], shard_shape[1] // total_shards)
+        else:
+            raise ValueError(f"Unsupported dim {dim}")
+    if len(t.shape) == 1:
+        assert t.shape[0] % total_shards == 0
+        return (t.shape[0] // total_shards,)
+    else:
+        raise ValueError(f"Unsupported shape {t.shape}")
+
+
+def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
+    assert config["cores_per_replica"] % checkpoint_shards == 0
+    output_shards = config["cores_per_replica"] // checkpoint_shards
+
+    import torch
+    import torch.utils.dlpack
+    from tqdm.auto import tqdm
+
+    move_xmap = jax.experimental.maps.xmap(
+        fun=lambda x, _: to_bf16(x),
+        in_axes=(["shard", ...], ["batch", ...]),
+        out_axes=["shard", ...],
+        axis_resources={'shard': 'mp', 'batch': 'dp'}
+    )
+
+    path_template = os.path.join(path, "layer_{layer:02d}-model_{shard:02d}-model_states.pt")
+
+    static_mapping = {
+        "word_embeddings.weight": {"module": "embedding_shard/~/linear", "param": "w", "axis": 1},
+        "final_linear.weight": {"module": "projection_shard/~/linear", "param": "w", "axis": 2},
+        "norm.weight": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale", "axis": None},
+        "norm.bias": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset", "axis": None},
+    }
+
+    layer_mapping = {
+        "attention.query_key_value.weight": {"module": "combined_qkv", "param": "w", "axis": 2},
+        "attention.query_key_value.bias": {"module": "combined_qkv", "param": "b", "axis": 1},
+        "attention.dense.weight": {"module": "linear_3", "param": "w", "axis": 1},
+        "attention.dense.bias": {"module": "linear_3", "param": "b", "axis": None},
+        "mlp.dense_h_to_4h.weight": {"module": "linear_4", "param": "w", "axis": 2},
+        "mlp.dense_h_to_4h.bias": {"module": "linear_4", "param": "b", "axis": 1},
+        "mlp.dense_4h_to_h.weight": {"module": "linear_5", "param": "w", "axis": 1},
+        "mlp.dense_4h_to_h.bias": {"module": "linear_5", "param": "b", "axis": None},
+        "input_layernorm.weight": {"module": "replicated_layer_norm", "param": "scale", "axis": None},
+        "input_layernorm.bias": {"module": "replicated_layer_norm", "param": "offset", "axis": None},
+        "post_attention_layernorm.weight": {"module": "replicated_layer_norm_1", "param": "scale", "axis": None},
+        "post_attention_layernorm.bias": {"module": "replicated_layer_norm_1", "param": "offset", "axis": None},
+    }
+
+    tqdm_length = len(static_mapping) + config["layers"]*len(layer_mapping)
+    bar = tqdm(total=tqdm_length, desc="Loading from NeoX checkpoint")
+
+    for checkpoint_layer in range(config["layers"] + 5):
+        if checkpoint_layer in (1, config["layers"] + 2):
+            continue
+        layer = checkpoint_layer - 2
+        shards = []
+        for checkpoint_shard in range(checkpoint_shards):
+            shards.append(torch.load(path_template.format(layer=checkpoint_layer, shard=checkpoint_shard), map_location="cpu"))
+        for key in shards[0]:
+            if key == "attention.rotary_emb.inv_freq":
+                continue
+            elif key in static_mapping:
+                target_module = "causal_transformer_shard/~/" + static_mapping[key]["module"]
+                target_param = static_mapping[key]["param"]
+                target_axis = static_mapping[key]["axis"]
+            elif key in layer_mapping:
+                target_module = f"causal_transformer_shard/~/layer_{layer}/~/" + layer_mapping[key]["module"]
+                target_param = layer_mapping[key]["param"]
+                target_axis = layer_mapping[key]["axis"]
+            else:
+                error = f"{repr(key)} not found in mapping"
+                print("\n\nERROR: ", error, file=sys.stderr)
+                raise RuntimeError(error)
+            original_shape = shards[0][key].shape
+            for checkpoint_shard in range(checkpoint_shards):
+                if key in ("attention.dense.bias", "mlp.dense_4h_to_h.bias"):
+                    shards[checkpoint_shard][key] /= output_shards
+                if key != "word_embeddings.weight" and shards[checkpoint_shard][key].ndim == 2:
+                    shards[checkpoint_shard][key] = shards[checkpoint_shard][key].T
+                tensor = shards[checkpoint_shard][key]
+                if target_axis is not None:
+                    target_shape = (output_shards,) + get_old_shape(tensor, total_shards=output_shards, dim=target_axis)
+                else:
+                    target_shape = (output_shards, tensor.shape[0])
+                shards[checkpoint_shard][key] = reshard_reverse(tensor.unsqueeze_(0), output_shards, target_shape)
+            #print(key, ":", original_shape, "->", shards[0][key].shape)
+            tensor = torch.cat([shards[s][key] for s in range(checkpoint_shards)], dim=0)
+            target_shape = state["params"][target_module][target_param].shape
+            if tensor.shape != target_shape:
+                error = f"Weight {repr(key)} has shape {tensor.shape} in checkpoint but shape {target_shape} was requested by MTJ for {target_module} {target_param}"
+                print("\n\nERROR: ", error, file=sys.stderr)
+                raise RuntimeError(error)
+            if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
+                tensor = tensor.bfloat16()
+            state["params"][target_module][target_param] = move_xmap(
+                jax.dlpack.from_dlpack(torch.utils.dlpack.to_dlpack(tensor)).copy(),
+                np.zeros(config["cores_per_replica"]),
+            )
+            bar.update(1)
+    for mk, mv in state["params"].items():
+        for pk, pv in mv.items():
+            if isinstance(pv, PlaceholderTensor):
+                error = f"{mk} {pk} could not be found in the model checkpoint"
+                print("\n\nERROR:  " + error, file=sys.stderr)
+                raise RuntimeError(error)
+
+
+def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params
 
     default_params = {
@@ -791,12 +997,96 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs)
         "pe_rotary_dims": 64,
         "seq": 2048,
         "cores_per_replica": 8,
+        "tokenizer_class": "GPT2TokenizerFast",
+        "tokenizer": "gpt2",
     }
     params = kwargs
+
+    if vars.model == "TPUMeshTransformerGPTNeoX":
+        default_params = {
+            "compat": "neox",
+            "layers": 44,
+            "d_model": 6144,
+            "n_heads": 64,
+            "n_vocab": 50432,
+            "n_vocab_padding": 0,
+            "norm": "doublelayernorm",
+            "pe": "neox_rotary",
+            "pe_rotary_dims": 24,
+            "seq": 2048,
+            "cores_per_replica": 8,
+            "tokenizer_class": "GPT2TokenizerFast",
+            "tokenizer": "gpt2",
+        }
+
+    # Try to convert HF config.json to MTJ config
+    if hf_checkpoint:
+        spec_path = os.path.join("maps", vars.model_type + ".json")
+        if not os.path.isfile(spec_path):
+            raise NotImplementedError(f"Unsupported model type {repr(vars.model_type)}")
+        with open(spec_path) as f:
+            lazy_load_spec = json.load(f)
+
+        if "mtj_compat" in lazy_load_spec:
+            params["compat"] = lazy_load_spec["mtj_compat"]
+        if "mtj_pe" in lazy_load_spec:
+            params["pe"] = lazy_load_spec["mtj_pe"]
+        for k, v in lazy_load_spec.get("mtj_config_map", {}).items():
+            if type(v) is not list:
+                params[k] = params[v]
+                continue
+            for i in range(len(v)):
+                if i == len(v) - 1:
+                    params[k] = v[i]
+                elif v[i] in params:
+                    params[k] = params[v[i]]
+                    break
+
+        params["n_vocab"] = params["vocab_size"]
+
+        if "activation_function" in params:
+            params["activation"] = params["activation_function"]
+
+        # Both the number of attention heads in the model and the embedding
+        # dimension of the model need to be divisible by the number of TPU cores
+        # that we use, and JAX also requires the number of TPU cores used to be
+        # an even number if we're using more than one core, so logically we try
+        # to pick the largest possible even number of TPU cores such that the
+        # number of attention heads and embedding dimension are both divisible
+        # by the number of TPU cores, and fall back to one core if an even
+        # number of TPU cores is not possible.
+        for c in (8, 6, 4, 2, 1):
+            if 0 == params["n_heads"] % c == params.get("d_embed", params["d_model"]) % c:
+                params["cores_per_replica"] = c
+                break
+
+        # The vocabulary size of the model also has to be divisible by the
+        # number of TPU cores, so we pad the vocabulary with the minimum
+        # possible number of dummy tokens such that it's divisible.
+        params["n_vocab_padding"] = -(params["n_vocab"] % -params["cores_per_replica"])
+
+    if "compat" in params:
+        default_params["compat"] = params["compat"]
+    if default_params["compat"] == "fairseq_lm":
+        default_params["tokenizer"] = "KoboldAI/fairseq-dense-125M"
     for param in default_params:
         if param not in params:
             params[param] = default_params[param]
 
+    # Load tokenizer
+    if vars.model == "TPUMeshTransformerGPTNeoX":
+        tokenizer = Tokenizer.from_file(os.path.join(path, "20B_tokenizer.json"))
+        def new_encode(old_encode):
+            def encode(s, *args, **kwargs):
+                return old_encode(s).ids
+            return encode
+        tokenizer.encode = new_encode(tokenizer.encode)
+    elif not hf_checkpoint:
+        if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
+            raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")
+        tokenizer_class = getattr(__import__("transformers"), params["tokenizer_class"])
+        tokenizer = tokenizer_class.from_pretrained(params["tokenizer"])
+
     # Disable JAX warnings about these two functions having been renamed
     jax.host_count = jax.process_count
     jax.host_id = jax.process_index
@@ -804,13 +1094,18 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs)
     print("Connecting to your Colab instance's TPU", flush=True)
     spinner = multiprocessing.Process(target=show_spinner, args=())
     spinner.start()
-    colab_tpu_addr = os.environ['COLAB_TPU_ADDR'].split(':')[0]
-    url = f'http://{colab_tpu_addr}:8475/requestversion/{driver_version}'
+    if os.environ.get('COLAB_TPU_ADDR', '') != '':
+        tpu_address = os.environ['COLAB_TPU_ADDR']  # Colab
+    else:
+        tpu_address = os.environ['TPU_NAME']  # Kaggle
+    tpu_address = tpu_address.replace("grpc://", "")
+    tpu_address_without_port = tpu_address.split(':', 1)[0]
+    url = f'http://{tpu_address_without_port}:8475/requestversion/{driver_version}'
+    config.FLAGS.jax_xla_backend = "tpu_driver"
+    config.FLAGS.jax_backend_target = "grpc://" + tpu_address
     requests.post(url)
     spinner.terminate()
     print()
-    config.FLAGS.jax_xla_backend = "tpu_driver"
-    config.FLAGS.jax_backend_target = "grpc://" + os.environ['COLAB_TPU_ADDR']
 
     cores_per_replica = params["cores_per_replica"]
     seq = params["seq"]
@@ -819,7 +1114,6 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs)
     devices = np.array(jax.devices()[:cores_per_replica]).reshape(mesh_shape)
     thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ())
     maps.thread_resources.env = thread_resources_env
-    tokenizer = transformers.GPT2TokenizerFast.from_pretrained('gpt2')
 
     global shard_xmap, batch_xmap
     shard_xmap = __shard_xmap()
@@ -832,6 +1126,198 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs)
     if not path.endswith("/"):
         path += "/"
 
-    network = PenalizingCausalTransformer(params)
-    network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])
-    network.state = network.move_xmap(network.state, np.zeros(cores_per_replica))
+    network = PenalizingCausalTransformer(params, dematerialized=True)
+
+    if not hf_checkpoint and vars.model != "TPUMeshTransformerGPTNeoX":
+        network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])
+        #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica))
+        return
+
+    if vars.model == "TPUMeshTransformerGPTNeoX":
+        print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")
+        read_neox_checkpoint(network.state, path, params)
+        return
+
+    # Convert from HF checkpoint
+
+    move_xmap = jax.experimental.maps.xmap(
+        fun=lambda x, _: to_bf16(x),
+        in_axes=(["shard", ...], ["batch", ...]),
+        out_axes=["shard", ...],
+        axis_resources={'shard': 'mp', 'batch': 'dp'}
+    )
+
+    model_spec = {}
+    for key, spec in lazy_load_spec.get("static_weights", {}).items():
+        if spec.get("mtj") is not None:
+            model_spec[key] = spec["mtj"].copy()
+            model_spec[key]["module"] = "causal_transformer_shard/~/" + model_spec[key]["module"]
+    for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
+        for layer in range(params["layers"]):
+            if spec.get("mtj") is not None:
+                key = _key.format(layer=layer)
+                model_spec[key] = spec["mtj"].copy()
+                model_spec[key]["module"] = "causal_transformer_shard/~/" + model_spec[key]["module"].format(layer=layer)
+
+    import torch_lazy_loader
+    import torch
+    from tqdm.auto import tqdm
+    import functools
+
+    def callback(model_dict, f, **_):
+        if callback.nested:
+            return
+        callback.nested = True
+        with zipfile.ZipFile(f, "r") as z:
+            try:
+                last_storage_key = None
+                f = None
+                current_offset = 0
+                if utils.current_shard == 0:
+                    print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")
+
+                if utils.num_shards is None or utils.current_shard == 0:
+                    if utils.num_shards is not None:
+                        num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                    else:
+                        num_tensors = len(model_dict)
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+
+                if utils.num_shards is not None:
+                    utils.current_shard += 1
+                for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
+
+                    # Some model weights are used by transformers but not by MTJ.
+                    # We have to materialize these weights anyways because
+                    # transformers will throw a tantrum otherwise.  To attain
+                    # the least possible memory usage, we create them as meta
+                    # tensors, which don't take up any actual CPU or TPU memory.
+                    if key not in model_spec:
+                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
+                        utils.bar.update(1)
+                        continue
+
+                    storage_key = model_dict[key].key
+                    if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
+                        last_storage_key = storage_key
+                        if isinstance(f, zipfile.ZipExtFile):
+                            f.close()
+                        f = z.open(f"archive/data/{storage_key}")
+                        current_offset = 0
+                    if current_offset != model_dict[key].seek_offset:
+                        f.read(model_dict[key].seek_offset - current_offset)
+                        current_offset = model_dict[key].seek_offset
+                    spec = model_spec[key]
+                    transforms = set(spec.get("transforms", ()))
+                    if not isinstance(model_dict[key], torch_lazy_loader.LazyTensor):
+                        error = f"Duplicate key {repr(key)}"
+                        print("\n\nERROR:  " + error, file=sys.stderr)
+                        raise RuntimeError(error)
+                    size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1)
+                    dtype = model_dict[key].dtype
+                    nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
+                    tensor = model_dict[key].materialize(f, map_location="cpu")
+                    model_dict[key] = tensor.to("meta")
+                    current_offset += nbytes
+
+                    # MTJ requires certain mathematical operations to be performed
+                    # on tensors in order for them to be in the correct format
+                    if "remove_first_two_rows" in transforms:
+                        tensor = tensor[2:]
+                    if "divide_by_shards" in transforms:
+                        tensor /= params["cores_per_replica"]
+                    if "vocab_pad" in transforms:
+                        tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
+                    if "no_transpose" not in transforms and tensor.ndim == 2:
+                        tensor = tensor.T
+                    tensor.unsqueeze_(0)
+                    if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
+                        tensor = tensor.bfloat16()
+
+                    # Shard the tensor so that parts of the tensor can be used
+                    # on different TPU cores
+                    network.state["params"][spec["module"]][spec["param"]] = move_xmap(
+                        jax.dlpack.from_dlpack(torch.utils.dlpack.to_dlpack(
+                            reshard_reverse(
+                                tensor,
+                                params["cores_per_replica"],
+                                network.state["params"][spec["module"]][spec["param"]].shape,
+                            )
+                        )).copy(),
+                        np.empty(params["cores_per_replica"]),
+                    )
+
+                    utils.bar.update(1)
+
+                if utils.num_shards is not None and utils.current_shard < utils.num_shards:
+                    return
+
+                # Check for tensors that MTJ needs that were not provided in the
+                # HF model
+                for mk, mv in network.state["params"].items():
+                    for pk, pv in mv.items():
+                        if isinstance(pv, PlaceholderTensor):
+                            # The transformers GPT-J models apparently do not
+                            # have embedding bias, whereas MTJ GPT-J models do,
+                            # so we have to supplement an embedding bias tensor
+                            # by creating a tensor with the necessary shape, filled
+                            # with zeros.
+                            if mk == "causal_transformer_shard/~/embedding_shard/~/linear" and pk == "b":
+                                mv[pk] = move_xmap(jnp.zeros(mv[pk].shape, dtype=jnp.bfloat16), np.empty(params["cores_per_replica"]))
+
+                            else:
+                                error = f"{mk} {pk} could not be found in the model checkpoint"
+                                print("\n\nERROR:  " + error, file=sys.stderr)
+                                raise RuntimeError(error)
+            finally:
+                if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                    utils.bar.close()
+                    utils.bar = None
+                callback.nested = False
+                if isinstance(f, zipfile.ZipExtFile):
+                    f.close()
+    callback.nested = False
+
+    if os.path.isdir(vars.model.replace('/', '_')):
+        import shutil
+        shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
+    print("\n", flush=True)
+    with torch_lazy_loader.use_lazy_torch_load(callback=callback, dematerialized_modules=True):
+        if(os.path.isdir(vars.custmodpth)):
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                try:
+                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                except Exception as e:
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+            try:
+                model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+        elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
+            try:
+                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                try:
+                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+                except Exception as e:
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+            try:
+                model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
+        else:
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                try:
+                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+                except Exception as e:
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
+            try:
+                model     = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+            except Exception as e:
+                model     = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
+
+    #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica))
diff --git a/umamba.exe b/umamba.exe
index 9b06bf78..302ce266 100644
Binary files a/umamba.exe and b/umamba.exe differ
diff --git a/update-koboldai.bat b/update-koboldai.bat
index 05ae280e..26e9ec17 100644
--- a/update-koboldai.bat
+++ b/update-koboldai.bat
@@ -50,4 +50,4 @@ git remote add origin %origin%
 git fetch --all
 git checkout %branch% -f
 git reset --hard origin/%branch%
-cmd /k
\ No newline at end of file
+%windir%\system32\timeout -t 10
\ No newline at end of file
diff --git a/userscripts/api_documentation.html b/userscripts/api_documentation.html
index f504f821..86230366 100644
--- a/userscripts/api_documentation.html
+++ b/userscripts/api_documentation.html
@@ -56,6 +56,7 @@
 <li><a href="#kobold.num_outputs">kobold.num_outputs</a></li>
 <li><a href="#kobold.outputs">kobold.outputs</a></li>
 <li><a href="#kobold.settings">kobold.settings</a></li>
+<li><a href="#kobold.spfilename">kobold.spfilename</a></li>
 <li><a href="#kobold.story">kobold.story</a>
 <ul>
 <li></li>
@@ -172,6 +173,7 @@
 <li><code>kobold.num_outputs</code></li>
 <li><code>kobold.outputs</code></li>
 <li><code>kobold.settings</code></li>
+<li><code>kobold.spfilename</code></li>
 <li><code>kobold.story</code></li>
 <li><code>kobold.submission</code></li>
 <li><code>kobold.worldinfo</code></li>
@@ -394,6 +396,14 @@
 <li><code>kobold.settings.setwidepth</code> (World Info Depth)</li>
 <li><code>kobold.settings.setuseprompt</code> (Always Use Prompt)</li>
 </ul>
+<h1 id="kobold.spfilename">kobold.spfilename</h1>
+<p><em><strong>Readable from:</strong></em> anywhere<br>
+<em><strong>Writable from:</strong></em> anywhere</p>
+<pre class=" language-lua"><code class="prism  language-lua">field kobold<span class="token punctuation">.</span>spfilename<span class="token punctuation">:</span> string?
+</code></pre>
+<p>The name of the soft prompt file to use (as a string), including the file extension. If not using a soft prompt, this is <code>nil</code> instead.</p>
+<p>You can also set the soft prompt to use by setting this to a string or <code>nil</code>.</p>
+<p>Modifying this field from inside of a generation modifier triggers a regeneration, which means that the context is recomputed after modification and generation begins again with the new context and previously generated tokens. This incurs a small performance penalty and should not be performed in excess.</p>
 <h1 id="kobold.story">kobold.story</h1>
 <p><em><strong>Readable from:</strong></em> anywhere<br>
 <em><strong>Writable from:</strong></em> nowhere</p>
diff --git a/userscripts/api_documentation.md b/userscripts/api_documentation.md
index 1c37c644..fda69670 100644
--- a/userscripts/api_documentation.md
+++ b/userscripts/api_documentation.md
@@ -29,6 +29,7 @@ global kobold: KoboldLib
 * `kobold.num_outputs`
 * `kobold.outputs`
 * `kobold.settings`
+* `kobold.spfilename`
 * `kobold.story`
 * `kobold.submission`
 * `kobold.worldinfo`
@@ -372,6 +373,21 @@ Modifying certain fields from inside of a generation modifier triggers a regener
 * `kobold.settings.setwidepth` (World Info Depth)
 * `kobold.settings.setuseprompt` (Always Use Prompt)
 
+# kobold.spfilename
+
+***Readable from:*** anywhere
+***Writable from:*** anywhere
+
+```lua
+field kobold.spfilename: string?
+```
+
+The name of the soft prompt file to use (as a string), including the file extension. If not using a soft prompt, this is `nil` instead.
+
+You can also set the soft prompt to use by setting this to a string or `nil`.
+
+Modifying this field from inside of a generation modifier triggers a regeneration, which means that the context is recomputed after modification and generation begins again with the new context and previously generated tokens. This incurs a small performance penalty and should not be performed in excess.
+
 # kobold.story
 
 ***Readable from:*** anywhere
diff --git a/utils.py b/utils.py
index 31184ed7..bc085412 100644
--- a/utils.py
+++ b/utils.py
@@ -1,5 +1,24 @@
 from threading import Timer
 import re
+import shutil
+import json
+import subprocess
+import tempfile
+import requests
+import requests.adapters
+import time
+from tqdm.auto import tqdm
+import os
+import itertools
+from typing import Optional
+
+vars = None
+num_shards: Optional[int] = None
+current_shard = 0
+from_pretrained_model_name = ""
+from_pretrained_index_filename: Optional[str] = None
+from_pretrained_kwargs = {}
+bar = None
 
 #==================================================================#
 # Decorator to prevent a function's actions from being run until
@@ -111,8 +130,171 @@ def cleanfilename(filename):
     filename = "".join(c for c in filename if c not in filteredcharacters).rstrip()
     return filename
     
-    
-    
-    
-    
-    
\ No newline at end of file
+#==================================================================#
+#  Newline substitution for fairseq models
+#==================================================================#
+def encodenewlines(txt):
+    if(vars.newlinemode == "s"):
+        return txt.replace('\n', "</s>")
+    return txt
+
+def decodenewlines(txt):
+    if(vars.newlinemode == "s"):
+        return txt.replace("</s>", '\n')
+    if(vars.newlinemode == "ns"):
+        return txt.replace("</s>", '')
+    return txt
+
+#==================================================================#
+#  Returns number of layers given an HF model config
+#==================================================================#
+def num_layers(config):
+    return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers
+
+#==================================================================#
+#  Downloads huggingface checkpoints using aria2c if possible
+#==================================================================#
+def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
+    import transformers
+    import transformers.modeling_utils
+    from huggingface_hub import HfFolder
+    if shutil.which("aria2c") is None:  # Don't do anything if aria2 is not installed
+        return
+    if local_files_only:  # If local_files_only is true, we obviously don't need to download anything
+        return
+    if os.path.isdir(pretrained_model_name_or_path) or os.path.isfile(pretrained_model_name_or_path) or os.path.isfile(pretrained_model_name_or_path + ".index") or transformers.modeling_utils.is_remote_url(pretrained_model_name_or_path):
+        return
+    if proxies:
+        print("WARNING:  KoboldAI does not support using aria2 to download models from huggingface.co through a proxy.  Disabling aria2 download mode.")
+        return
+    if use_auth_token:
+        if isinstance(use_auth_token, str):
+            token = use_auth_token
+        else:
+            token = HfFolder.get_token()
+            if token is None:
+                raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
+    _cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
+    sharded = False
+    headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
+    if use_auth_token:
+        headers["authorization"] = f"Bearer {use_auth_token}"
+    def is_cached(url):
+        try:
+            transformers.file_utils.get_from_cache(url, cache_dir=cache_dir, local_files_only=True)
+        except FileNotFoundError:
+            return False
+        return True
+    while True:  # Try to get the huggingface.co URL of the model's pytorch_model.bin or pytorch_model.bin.index.json file
+        try:
+            filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
+        except AttributeError:
+            return
+        url = transformers.file_utils.hf_bucket_url(pretrained_model_name_or_path, filename, revision=revision, mirror=mirror)
+        if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
+            break
+        if sharded:
+            return
+        else:
+            sharded = True
+    if not sharded:  # If the model has a pytorch_model.bin file, that's the only file to download
+        filenames = [transformers.modeling_utils.WEIGHTS_NAME]
+    else:  # Otherwise download the pytorch_model.bin.index.json and then let aria2 download all the pytorch_model-#####-of-#####.bin files mentioned inside it
+        map_filename = transformers.file_utils.cached_path(url, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, use_auth_token=use_auth_token, user_agent=user_agent)
+        with open(map_filename) as f:
+            map_data = json.load(f)
+        filenames = set(map_data["weight_map"].values())
+    urls = [transformers.file_utils.hf_bucket_url(pretrained_model_name_or_path, n, revision=revision, mirror=mirror) for n in filenames]
+    if not force_download:
+        urls = [u for u in urls if not is_cached(u)]
+        if not urls:
+            return
+    etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
+    headers = [requests.head(u, headers=headers, allow_redirects=True, proxies=proxies, timeout=10).headers for u in urls]
+    filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
+    for n in filenames:
+        path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
+        if os.path.exists(path):
+            os.remove(path)
+        path = os.path.join(_cache_dir, "kai-tempfile." + n)
+        if os.path.exists(path):
+            os.remove(path)
+        if force_download:
+            path = os.path.join(_cache_dir, n + ".json")
+            if os.path.exists(path):
+                os.remove(path)
+            path = os.path.join(_cache_dir, n)
+            if os.path.exists(path):
+                os.remove(path)
+    total_length = sum(int(h["Content-Length"]) for h in headers)
+    lengths = {}
+    aria2_config = "\n".join(f"{u}\n  out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
+    s = requests.Session()
+    s.mount("http://", requests.adapters.HTTPAdapter(max_retries=requests.adapters.Retry(total=120, backoff_factor=1)))
+    bar = None
+    done = False
+    secret = os.urandom(17).hex()
+    try:
+        with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
+            f.write(aria2_config)
+            f.flush()
+            p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+            while p.poll() is None:
+                r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
+                if not r:
+                    s.close()
+                    if bar is not None:
+                        bar.n = bar.total
+                        bar.close()
+                    p.terminate()
+                    done = True
+                    break
+                if bar is None:
+                    bar = tqdm(total=total_length, desc=f"[aria2] Downloading model", unit="B", unit_scale=True, unit_divisor=1000)
+                visited = set()
+                for x in r:
+                    filename = x["files"][0]["path"]
+                    lengths[filename] = (int(x["completedLength"]), int(x["totalLength"]))
+                    visited.add(filename)
+                for k, v in lengths.items():
+                    if k not in visited:
+                        lengths[k] = (v[1], v[1])
+                bar.n = sum(v[0] for v in lengths.values())
+                bar.update()
+                time.sleep(0.1)
+            path = f.name
+    except Exception as e:
+        p.terminate()
+        raise e
+    finally:
+        try:
+            os.remove(path)
+        except OSError:
+            pass
+    code = p.wait()
+    if not done and code:
+        raise OSError(f"aria2 exited with exit code {code}")
+    for u, t, n in zip(urls, etags, filenames):
+        os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
+        with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
+            json.dump({"url": u, "etag": t}, f)
+
+#==================================================================#
+#  Given the path to a pytorch_model.bin.index.json, returns how many
+#  shards there are in the model
+#==================================================================#
+def get_num_shards(filename):
+    with open(filename) as f:
+        map_data = json.load(f)
+    return len(set(map_data["weight_map"].values()))
+
+#==================================================================#
+#  Given the name/path of a sharded model and the path to a
+#  pytorch_model.bin.index.json, returns a list of weight names in the
+#  sharded model.  Requires lazy loader to be enabled to work properl
+#==================================================================#
+def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
+    import transformers.modeling_utils
+    import torch
+    shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision, mirror=mirror)
+    return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
diff --git a/warpers.py b/warpers.py
index 07670f6d..7c4f854b 100644
--- a/warpers.py
+++ b/warpers.py
@@ -62,7 +62,7 @@ class TailFreeLogitsWarper(LogitsWarper):
     def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
         tfs = float(tfs)
         if tfs < 0 or tfs > 1.0:
-            raise ValueError(f"`tfs` has to be a float > 0 and < 1, but is {tfs}")
+            raise ValueError(f"`tfs` has to be a float >= 0 and <= 1, but is {tfs}")
         self.tfs = tfs
         self.filter_value = filter_value
         self.min_tokens_to_keep = min_tokens_to_keep
@@ -98,3 +98,53 @@ class TailFreeLogitsWarper(LogitsWarper):
         indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
         scores = scores.masked_fill(indices_to_remove, self.filter_value)
         return scores
+
+
+class TypicalLogitsWarper(LogitsWarper):
+    '''
+    Typical sampling, described in https://arxiv.org/pdf/2202.00666.pdf
+    '''
+
+    def __init__(self, typical: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
+        typical = float(typical)
+        if typical < 0 or typical > 1.0:
+            raise ValueError(f"`typical` has to be a float >= 0 and <= 1, but is {typical}")
+        self.typical = typical
+        self.filter_value = filter_value
+        self.min_tokens_to_keep = min_tokens_to_keep
+
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        if self.filter_value >= 1.0:
+            return scores
+
+        # Compute softmax probabilities and the natural logarithms of them
+        probs = scores.softmax(dim=-1)
+        log_probs = probs.log()
+
+        # Compute the negative of entropy, which is the sum of p*ln(p) for all p
+        # in the set of softmax probabilities of the logits
+        neg_entropy = (probs * log_probs).nansum(dim=-1, keepdim=True)
+
+        # Determine absolute difference between the negative entropy and the
+        # log probabilities
+        entropy_deviation = (neg_entropy - log_probs).abs()
+
+        # Keep certain tokens such that the sum of the entropy_deviation of the
+        # kept tokens is the smallest possible value such that the sum of the
+        # softmax probabilities of the kept tokens is at least the threshold
+        # value (by sorting the tokens in ascending order of entropy_deviation
+        # and then keeping the smallest possible number of tokens from the
+        # beginning such that sum of softmax probabilities is at or above the
+        # threshold)
+        _, sorted_indices = torch.sort(entropy_deviation)
+        sorted_logits = probs.gather(-1, sorted_indices)
+        sorted_indices_to_remove = sorted_logits.cumsum(dim=-1) >= self.typical
+        sorted_indices_to_remove = sorted_indices_to_remove.roll(1, dims=-1)
+
+        min_tokens_to_keep = max(self.min_tokens_to_keep, 1)
+        # Keep at least min_tokens_to_keep
+        sorted_indices_to_remove[..., : min_tokens_to_keep] = 0
+
+        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+        scores = scores.masked_fill(indices_to_remove, self.filter_value)
+        return scores