Merge branch 'main' into neox

This commit is contained in:
Gnome Ann 2022-06-21 19:30:51 -04:00
commit 5e3c7c07ae
24 changed files with 1183 additions and 307 deletions

3
.gitignore vendored
View File

@ -31,3 +31,6 @@ Uninstall
# Ignore compiled Python files.
*.pyc
# Don't ignore defaults
!defaults/*

32
Uninstall.bat Normal file
View File

@ -0,0 +1,32 @@
@echo off
cd /D %~dp0
TITLE KoboldAI Uninstall Helper
SET /P M=<loader.settings
IF %M%==3 subst /D B: >nul
IF %M%==1 subst /D K: >nul
IF "%1" == "FORCE" GOTO UNINSTALL
IF EXIST "Uninstall\unins000.exe" (
start Uninstall\unins000.exe
exit
) ELSE (
echo This will remove all KoboldAI folders that do not contain user data
pause
GOTO UNINSTALL
)
:UNINSTALL
echo Uninstallation in progress, please wait...
set DM=Y
attrib -h .git >nul
for /d %%D in (*) do if not "%%~nxD"=="stories" if not "%%~nxD"=="userscripts" if not "%%~nxD"=="settings" if not "%%~nxD"=="softprompts" if not "%%~nxD"=="models" if not "%%~nxD"=="Uninstall" rmdir /S /Q %%~nxD
for %%i in (*) do if not "%%i"=="Uninstall.bat" del /q "%%i"
set /P DM=Would you like to delete the models folder? (Y/n) :
IF %DM%==Y rmdir models /s /q
IF %DM%==y rmdir models /s /q
set DM=N
set /P DM=Would you like to delete all other user folders? (y/N) :
IF %DM%==Y rmdir stories userscripts settings softprompts /s /q
IF %DM%==y rmdir stories userscripts settings softprompts /s /q
del Uninstall.bat

View File

@ -1,7 +1,7 @@
#!/usr/bin/python3
#==================================================================#
# KoboldAI
# Version: 1.17.0
# Version: 1.18.1
# By: KoboldAIDev and the KoboldAI Community
#==================================================================#
@ -16,6 +16,9 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
from eventlet import tpool
import logging
logging.getLogger("urllib3").setLevel(logging.ERROR)
from os import path, getcwd
import time
import re
@ -23,6 +26,7 @@ import json
import collections
import zipfile
import packaging
import packaging.version
import contextlib
import traceback
import threading
@ -54,6 +58,27 @@ if lupa.LUA_VERSION[:2] != (5, 4):
print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)
# Make sure tqdm progress bars display properly in Colab
from tqdm.auto import tqdm
old_init = tqdm.__init__
def new_init(self, *args, **kwargs):
old_init(self, *args, **kwargs)
if(self.ncols == 0 and kwargs.get("ncols") != 0):
self.ncols = 99
tqdm.__init__ = new_init
# Fix some issues with the OPT tokenizer
from transformers import PreTrainedTokenizerBase
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
@classmethod
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
tokenizer._koboldai_header = tokenizer.encode("")
tokenizer.add_bos_token = False
tokenizer.add_prefix_space = False
return tokenizer
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
#==================================================================#
# Variables & Storage
#==================================================================#
@ -76,9 +101,9 @@ mainmenu = [
["Adventure Models", "adventurelist", ""],
["Novel Models", "novellist", ""],
["NSFW Models", "nsfwlist", ""],
["Chatbot Models", "chatlist", ""],
["Untuned GPT-Neo/J", "gptneolist", ""],
["Untuned Fairseq Dense", "fsdlist", ""],
["Untuned OPT", "optlist", ""],
["Untuned XGLM", "xglmlist", ""],
["Untuned GPT2", "gpt2list", ""],
["Online Services", "apilist", ""],
@ -86,8 +111,10 @@ mainmenu = [
]
adventurelist= [
["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
@ -95,11 +122,13 @@ adventurelist= [
]
novellist= [
["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
["Return to Main Menu", "Return", ""],
@ -137,6 +166,17 @@ gpt2list = [
["Return to Main Menu", "Return", ""],
]
optlist = [
["OPT 30B", "facebook/opt-30b", "64GB"],
["OPT 13B", "facebook/opt-13b", "32GB"],
["OPT 6.7B", "facebook/opt-6.7b", "16GB"],
["OPT 2.7B", "facebook/opt-2.7b", "8GB"],
["OPT 1.3B", "facebook/opt-1.3b", "4GB"],
["OPT 350M", "facebook/opt-350m", "2GB"],
["OPT 125M", "facebook/opt-125m", "1GB"],
["Return to Main Menu", "Return", ""],
]
fsdlist = [
["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
@ -172,7 +212,7 @@ class vars:
model_type = "" # Model Type (Automatically taken from the model config)
noai = False # Runs the script without starting up the transformers pipeline
aibusy = False # Stops submissions while the AI is working
max_length = 1024 # Maximum number of tokens to submit per action
max_length = 2048 # Maximum number of tokens to submit per action
ikmax = 3000 # Maximum number of characters to submit to InferKit
genamt = 80 # Amount of text for each action to generate
ikgen = 200 # Number of characters for InferKit to generate
@ -182,6 +222,7 @@ class vars:
temp = 0.5 # Default generator temperature
top_p = 0.9 # Default generator top_p
top_k = 0 # Default generator top_k
top_a = 0.0 # Default generator top-a
tfs = 1.0 # Default generator tfs (tail-free sampling)
typical = 1.0 # Default generator typical sampling threshold
numseqs = 1 # Number of sequences to ask the generator to create
@ -228,6 +269,8 @@ class vars:
# badwords = [] # Array of str/chr values that should be removed from output
badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]]
fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
deletewi = None # Temporary storage for UID to delete
wirmvwhtsp = False # Whether to remove leading whitespace from WI entries
widepth = 3 # How many historical actions to scan for WI hits
@ -262,7 +305,7 @@ class vars:
recentrngm = None # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None
useprompt = False # Whether to send the full prompt with every submit action
breakmodel = False # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only
bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM only, currently)
bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM/OPT only, currently)
nobreakmodel = False # Something specifically requested Breakmodel to be disabled (For example a models config)
smandelete = False # Whether stories can be deleted from inside the browser
smanrename = False # Whether stories can be renamed from inside the browser
@ -274,6 +317,7 @@ class vars:
acregex_ui = re.compile(r'^ *(&gt;.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses)
comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI
comregex_ui = re.compile(r'(&lt;\|(?:.|\n)*?\|&gt;)') # Pattern for matching comments in the editor
sampler_order = utils.default_sampler_order.copy()
chatmode = False
chatname = "You"
adventure = False
@ -288,7 +332,7 @@ class vars:
quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
debug = False # If set to true, will send debug information to the client for display
lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" # Whether or not we're in a Colab TPU instance and are going to use the TPU rather than the CPU
use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" or os.environ.get("TPU_NAME", "") != "" # Whether or not we're in a Colab TPU instance or Kaggle TPU instance and are going to use the TPU rather than the CPU
utils.vars = vars
@ -379,7 +423,7 @@ def device_list(n_layers, primary=None, selected=None):
def device_config(config):
global breakmodel, generator
import breakmodel
n_layers = config.num_layers if hasattr(config, "num_layers") else config.n_layer
n_layers = utils.num_layers(config)
if(args.breakmodel_gpulayers is not None):
try:
breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
@ -452,7 +496,7 @@ def device_config(config):
# If all layers are on the same device, use the old GPU generation mode
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
breakmodel.gpu_blocks.pop()
if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, config.num_layers if hasattr(config, "num_layers") else config.n_layer)):
if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, utils.num_layers(config))):
vars.breakmodel = False
vars.usegpu = True
vars.gpu_device = len(breakmodel.gpu_blocks)-1
@ -484,22 +528,33 @@ def move_model_to_devices(model):
model.lm_head.to(breakmodel.primary_device)
if(hasattr(model.transformer, 'wpe')):
model.transformer.wpe.to(breakmodel.primary_device)
else:
elif(not hasattr(model.model, "decoder")):
model.model.embed_tokens.to(breakmodel.primary_device)
model.model.layer_norm.to(breakmodel.primary_device)
model.lm_head.to(breakmodel.primary_device)
model.model.embed_positions.to(breakmodel.primary_device)
else:
model.model.decoder.embed_tokens.to(breakmodel.primary_device)
if(model.model.decoder.project_in is not None):
model.model.decoder.project_in.to(breakmodel.primary_device)
if(model.model.decoder.project_out is not None):
model.model.decoder.project_out.to(breakmodel.primary_device)
model.model.decoder.embed_positions.to(breakmodel.primary_device)
gc.collect()
GPTNeoModel.forward = breakmodel.new_forward_neo
if("GPTJModel" in globals()):
GPTJModel.forward = breakmodel.new_forward_neo # type: ignore
if("XGLMModel" in globals()):
XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore
if("OPTDecoder" in globals()):
OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore
generator = model.generate
if(hasattr(model, "transformer")):
breakmodel.move_hidden_layers(model.transformer)
else:
elif(not hasattr(model.model, "decoder")):
breakmodel.move_hidden_layers(model.model, model.model.layers)
else:
breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers)
#==================================================================#
# Allow the models to override some settings
@ -515,13 +570,17 @@ def loadmodelsettings():
js = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))
except Exception as e:
js = {}
if vars.model_type == "xglm" or vars.model_type == "opt" or js.get("compat", "j") == "fairseq_lm":
if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
vars.newlinemode = "s" # Default to </s> newline mode if using XGLM
if vars.model_type == "opt":
vars.newlinemode = "ns" # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
vars.modelconfig = js
if("badwordsids" in js):
vars.badwordsids = js["badwordsids"]
if("nobreakmodel" in js):
vars.nobreakmodel = js["nobreakmodel"]
if("sampler_order" in js):
vars.sampler_order = js["sampler_order"]
if("temp" in js):
vars.temp = js["temp"]
if("top_p" in js):
@ -532,6 +591,8 @@ def loadmodelsettings():
vars.tfs = js["tfs"]
if("typical" in js):
vars.typical = js["typical"]
if("top_a" in js):
vars.top_a = js["top_a"]
if("rep_pen" in js):
vars.rep_pen = js["rep_pen"]
if("rep_pen_slope" in js):
@ -563,11 +624,13 @@ def savesettings():
js = {}
js["apikey"] = vars.apikey
js["andepth"] = vars.andepth
js["sampler_order"] = vars.sampler_order
js["temp"] = vars.temp
js["top_p"] = vars.top_p
js["top_k"] = vars.top_k
js["tfs"] = vars.tfs
js["typical"] = vars.typical
js["top_a"] = vars.top_a
js["rep_pen"] = vars.rep_pen
js["rep_pen_slope"] = vars.rep_pen_slope
js["rep_pen_range"] = vars.rep_pen_range
@ -615,88 +678,102 @@ def settingschanged():
#==================================================================#
# Read settings from client file JSON and send to vars
#==================================================================#
def loadsettings():
if(path.exists("defaults/" + getmodelname().replace('/', '_') + ".settings")):
# Read file contents into JSON object
file = open("defaults/" + getmodelname().replace('/', '_') + ".settings", "r")
js = json.load(file)
processsettings(js)
file.close()
if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
# Read file contents into JSON object
file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
js = json.load(file)
# Copy file contents to vars
if("apikey" in js):
vars.apikey = js["apikey"]
if("andepth" in js):
vars.andepth = js["andepth"]
if("temp" in js):
vars.temp = js["temp"]
if("top_p" in js):
vars.top_p = js["top_p"]
if("top_k" in js):
vars.top_k = js["top_k"]
if("tfs" in js):
vars.tfs = js["tfs"]
if("typical" in js):
vars.typical = js["typical"]
if("rep_pen" in js):
vars.rep_pen = js["rep_pen"]
if("rep_pen_slope" in js):
vars.rep_pen_slope = js["rep_pen_slope"]
if("rep_pen_range" in js):
vars.rep_pen_range = js["rep_pen_range"]
if("genamt" in js):
vars.genamt = js["genamt"]
if("max_length" in js):
vars.max_length = js["max_length"]
if("ikgen" in js):
vars.ikgen = js["ikgen"]
if("formatoptns" in js):
vars.formatoptns = js["formatoptns"]
if("numseqs" in js):
vars.numseqs = js["numseqs"]
if("widepth" in js):
vars.widepth = js["widepth"]
if("useprompt" in js):
vars.useprompt = js["useprompt"]
if("adventure" in js):
vars.adventure = js["adventure"]
if("chatmode" in js):
vars.chatmode = js["chatmode"]
if("chatname" in js):
vars.chatname = js["chatname"]
if("dynamicscan" in js):
vars.dynamicscan = js["dynamicscan"]
if("nopromptgen" in js):
vars.nopromptgen = js["nopromptgen"]
if("rngpersist" in js):
vars.rngpersist = js["rngpersist"]
if("nogenmod" in js):
vars.nogenmod = js["nogenmod"]
if("autosave" in js):
vars.autosave = js["autosave"]
if("newlinemode" in js):
vars.newlinemode = js["newlinemode"]
if("welcome" in js):
vars.welcome = js["welcome"]
if("antemplate" in js):
vars.setauthornotetemplate = js["antemplate"]
if(not vars.gamestarted):
vars.authornotetemplate = vars.setauthornotetemplate
if("userscripts" in js):
vars.userscripts = []
for userscript in js["userscripts"]:
if type(userscript) is not str:
continue
userscript = userscript.strip()
if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
vars.userscripts.append(userscript)
if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
vars.corescript = js["corescript"]
else:
vars.corescript = "default.lua"
processsettings(js)
file.close()
def processsettings(js):
# Copy file contents to vars
if("apikey" in js):
vars.apikey = js["apikey"]
if("andepth" in js):
vars.andepth = js["andepth"]
if("sampler_order" in js):
vars.sampler_order = js["sampler_order"]
if("temp" in js):
vars.temp = js["temp"]
if("top_p" in js):
vars.top_p = js["top_p"]
if("top_k" in js):
vars.top_k = js["top_k"]
if("tfs" in js):
vars.tfs = js["tfs"]
if("typical" in js):
vars.typical = js["typical"]
if("top_a" in js):
vars.top_a = js["top_a"]
if("rep_pen" in js):
vars.rep_pen = js["rep_pen"]
if("rep_pen_slope" in js):
vars.rep_pen_slope = js["rep_pen_slope"]
if("rep_pen_range" in js):
vars.rep_pen_range = js["rep_pen_range"]
if("genamt" in js):
vars.genamt = js["genamt"]
if("max_length" in js):
vars.max_length = js["max_length"]
if("ikgen" in js):
vars.ikgen = js["ikgen"]
if("formatoptns" in js):
vars.formatoptns = js["formatoptns"]
if("numseqs" in js):
vars.numseqs = js["numseqs"]
if("widepth" in js):
vars.widepth = js["widepth"]
if("useprompt" in js):
vars.useprompt = js["useprompt"]
if("adventure" in js):
vars.adventure = js["adventure"]
if("chatmode" in js):
vars.chatmode = js["chatmode"]
if("chatname" in js):
vars.chatname = js["chatname"]
if("dynamicscan" in js):
vars.dynamicscan = js["dynamicscan"]
if("nopromptgen" in js):
vars.nopromptgen = js["nopromptgen"]
if("rngpersist" in js):
vars.rngpersist = js["rngpersist"]
if("nogenmod" in js):
vars.nogenmod = js["nogenmod"]
if("autosave" in js):
vars.autosave = js["autosave"]
if("newlinemode" in js):
vars.newlinemode = js["newlinemode"]
if("welcome" in js):
vars.welcome = js["welcome"]
if("antemplate" in js):
vars.setauthornotetemplate = js["antemplate"]
if(not vars.gamestarted):
vars.authornotetemplate = vars.setauthornotetemplate
if("userscripts" in js):
vars.userscripts = []
for userscript in js["userscripts"]:
if type(userscript) is not str:
continue
userscript = userscript.strip()
if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
vars.userscripts.append(userscript)
if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
vars.corescript = js["corescript"]
else:
vars.corescript = "default.lua"
#==================================================================#
# Load a soft prompt from a file
@ -760,7 +837,7 @@ def spRequest(filename):
tensor = tensor.reshape(
tpu_mtj_backend.params["cores_per_replica"],
-1,
tpu_mtj_backend.params["d_model"],
tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
)
vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
else:
@ -782,6 +859,7 @@ parser.add_argument("--ngrok", action='store_true', help="Optimizes KoboldAI for
parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel")
parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service")
parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
@ -841,6 +919,8 @@ if args.cpu:
vars.smandelete = vars.host == args.override_delete
vars.smanrename = vars.host == args.override_rename
vars.aria2_port = args.aria2_port or 6799
# Select a model to run
if args.model:
print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model)
@ -894,12 +974,15 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
vars.model_type = "gpt_neo"
if(vars.model_type == "opt"):
vars.badwordsids = vars.badwordsids_opt
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
loadmodelsettings()
loadsettings()
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
vars.hascuda = torch.cuda.is_available()
vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm") and not vars.nobreakmodel
vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
if(args.breakmodel is not None and args.breakmodel):
print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
if(args.breakmodel_layers is not None):
@ -1111,17 +1194,36 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
globals()[m] = getattr(__import__("transformers"), m)
except:
pass
try:
from transformers.models.opt.modeling_opt import OPTDecoder
except:
pass
import transformers.generation_utils
from transformers import __version__ as transformers_version
from transformers import PreTrainedModel
from transformers import modeling_utils
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
@classmethod
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
vars.fp32_model = False
utils.num_shards = None
utils.current_shard = 0
utils.from_pretrained_model_name = pretrained_model_name_or_path
utils.from_pretrained_index_filename = None
utils.from_pretrained_kwargs = kwargs
utils.bar = None
if not args.no_aria2:
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
PreTrainedModel.from_pretrained = new_from_pretrained
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
utils.num_shards = utils.get_num_shards(index_filename)
utils.from_pretrained_index_filename = index_filename
return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
# Lazy loader
import torch_lazy_loader
@ -1139,6 +1241,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
def lazy_load_callback(model_dict, f, **_):
if lazy_load_callback.nested:
return
lazy_load_callback.nested = True
device_map = {}
for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
@ -1153,12 +1259,22 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
if utils.num_shards is None or utils.current_shard == 0:
if utils.num_shards is not None:
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
else:
num_tensors = len(device_map)
print(flush=True)
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
with zipfile.ZipFile(f, "r") as z:
try:
last_storage_key = None
f = None
current_offset = 0
for key in tqdm(sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
if utils.num_shards is not None:
utils.current_shard += 1
for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
storage_key = model_dict[key].key
if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
last_storage_key = storage_key
@ -1175,6 +1291,8 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
#print(f"Transferring <{key}> to {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
if model_dict[key].dtype is torch.float32:
vars.fp32_model = True
if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
model_dict[key] = model_dict[key].to(torch.float16)
if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
@ -1182,10 +1300,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
model_dict[key] = model_dict[key].to(device)
#print("OK", flush=True)
current_offset += nbytes
utils.bar.update(1)
finally:
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
utils.bar.close()
utils.bar = None
lazy_load_callback.nested = False
if isinstance(f, zipfile.ZipExtFile):
f.close()
lazy_load_callback.nested = False
return lazy_load_callback
lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
@ -1231,8 +1355,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
input_ids.clamp_(max=self.config.vocab_size-1)
if(hasattr(self, "transformer")):
inputs_embeds = self.transformer.wte(input_ids)
else:
elif(not hasattr(self.model, "decoder")):
inputs_embeds = self.model.embed_tokens(input_ids)
else:
inputs_embeds = self.model.decoder.embed_tokens(input_ids)
if(vars.sp is not None):
vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
inputs_embeds = torch.where(
@ -1240,23 +1366,42 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
vars.sp[shifted_input_ids.clamp(min=0)],
inputs_embeds,
)
if(not hasattr(self, "transformer")):
if(hasattr(self, "model") and hasattr(self.model, "embed_scale")):
inputs_embeds *= self.model.embed_scale
kwargs['inputs_embeds'] = inputs_embeds
return old_forward(self, *args, **kwargs)
cls.forward = new_causallm_forward
for cls in (GPT2LMHeadModel, GPTNeoForCausalLM):
patch_causallm(cls)
for c in ("GPTJForCausalLM", "XGLMForCausalLM"):
for c in ("GPTJForCausalLM", "XGLMForCausalLM", "OPTForCausalLM"):
try:
patch_causallm(getattr(__import__("transformers"), c))
except:
pass
# Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")):
try:
from transformers import OPTForCausalLM, OPTModel
except ImportError:
pass
else:
# This is the same as the original __init__ but with
# config.hidden_size
# replaced with
# config.word_embed_proj_dim
def new_init(self, config):
super(OPTForCausalLM, self).__init__(config)
self.model = OPTModel(config)
self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
self.post_init()
OPTForCausalLM.__init__ = new_init
# Patch transformers to use our custom logit warpers
from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper
from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper, TopALogitsWarper
def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
old_call = cls.__call__
@ -1276,6 +1421,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
cls.__call__ = new_call
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
@ -1319,21 +1465,30 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor
transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor
class KoboldLogitsWarperList(LogitsProcessorList):
def __init__(self, beams: int = 1, **kwargs):
self.__warper_list: List[LogitsWarper] = []
self.__warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TopALogitsWarper(top_a=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5))
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs):
for k in vars.sampler_order:
scores = self.__warper_list[k](input_ids, scores, *args, **kwargs)
return scores
def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
warper_list = LogitsProcessorList()
warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TemperatureLogitsWarper(temperature=0.5))
return warper_list
return KoboldLogitsWarperList(beams=beams)
def new_sample(self, *args, **kwargs):
assert kwargs.pop("logits_warper", None) is not None
kwargs["logits_warper"] = new_get_logits_warper(
beams=1,
)
if(vars.newlinemode == "s"):
if(vars.newlinemode == "s") or (vars.newlinemode == "ns"):
kwargs["eos_token_id"] = -1
kwargs.setdefault("pad_token_id", 2)
return new_sample.old_sample(self, *args, **kwargs)
@ -1408,12 +1563,18 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
def get_hidden_size_from_model(model):
try:
return int(model.transformer.hidden_size)
return int(model.model.decoder.project_in.in_features)
except:
try:
return int(model.transformer.embed_dim)
return int(model.model.decoder.embed_tokens.out_features)
except:
return int(model.lm_head.in_features)
try:
return int(model.transformer.hidden_size)
except:
try:
return int(model.transformer.embed_dim)
except:
return int(model.lm_head.in_features)
def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
@ -1468,12 +1629,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
import shutil
shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
print("\n", flush=True)
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer) if vars.lazy_load else None, dematerialized_modules=True):
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
lowmem = {}
if(os.path.isdir(vars.custmodpth)):
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1486,6 +1651,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1496,8 +1665,25 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
except Exception as e:
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
else:
old_rebuild_tensor = torch._utils._rebuild_tensor
def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
if(not isinstance(storage, torch_lazy_loader.LazyTensor)):
dtype = storage.dtype
else:
dtype = storage.storage_type.dtype
if(not isinstance(dtype, torch.dtype)):
dtype = storage.storage_type(0).dtype
if(dtype is torch.float32 and len(shape) >= 2):
vars.fp32_model = True
return old_rebuild_tensor(storage, storage_offset, shape, stride)
torch._utils._rebuild_tensor = new_rebuild_tensor
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
@ -1508,11 +1694,32 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
except Exception as e:
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
torch._utils._rebuild_tensor = old_rebuild_tensor
if not args.colab or args.savemodel:
import shutil
model = model.half()
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
if(vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16
model = model.half()
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
else: # For fp16 models, we can just copy the model files directly
import transformers.configuration_utils
import transformers.modeling_utils
import transformers.file_utils
# Save the config.json
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
if(utils.num_shards is None):
# Save the pytorch_model.bin of an unsharded model
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
else:
with open(utils.from_pretrained_index_filename) as f:
map_data = json.load(f)
filenames = set(map_data["weight_map"].values())
# Save the pytorch_model.bin.index.json of a sharded model
shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
# Then save the pytorch_model-#####-of-#####.bin files
for filename in filenames:
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
shutil.rmtree("cache/")
if(vars.hascuda):
@ -1548,13 +1755,28 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
else:
from transformers import PreTrainedModel
from transformers import modeling_utils
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
@classmethod
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
vars.fp32_model = False
utils.num_shards = None
utils.current_shard = 0
utils.from_pretrained_model_name = pretrained_model_name_or_path
utils.from_pretrained_index_filename = None
utils.from_pretrained_kwargs = kwargs
utils.bar = None
if not args.no_aria2:
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
PreTrainedModel.from_pretrained = new_from_pretrained
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
utils.num_shards = utils.get_num_shards(index_filename)
utils.from_pretrained_index_filename = index_filename
return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
def tpumtjgetsofttokens():
soft_tokens = None
@ -1562,14 +1784,14 @@ else:
global np
if 'np' not in globals():
import numpy as np
tensor = np.zeros((1, tpu_mtj_backend.params["d_model"]), dtype=np.float32)
tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
rows = tensor.shape[0]
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
tensor = tensor.reshape(
tpu_mtj_backend.params["cores_per_replica"],
-1,
tpu_mtj_backend.params["d_model"],
tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
)
vars.sp = tpu_mtj_backend.shard_xmap(tensor)
soft_tokens = np.arange(
@ -1631,11 +1853,13 @@ else:
def tpumtjgenerate_settings_callback() -> dict:
return {
"sampler_order": vars.sampler_order,
"top_p": float(vars.top_p),
"temp": float(vars.temp),
"top_k": int(vars.top_k),
"tfs": float(vars.tfs),
"typical": float(vars.typical),
"top_a": float(vars.top_a),
"repetition_penalty": float(vars.rep_pen),
"rpslope": float(vars.rep_pen_slope),
"rprange": int(vars.rep_pen_range),
@ -1658,7 +1882,7 @@ else:
if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
import tpu_mtj_backend
if(vars.model == "TPUMeshTransformerGPTNeoX"):
if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"):
tpu_mtj_backend.pad_token_id = 1
tpu_mtj_backend.vars = vars
tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
@ -1670,7 +1894,7 @@ else:
loadmodelsettings()
loadsettings()
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
vars.modeldim = int(tpu_mtj_backend.params["d_model"])
vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]))
tokenizer = tpu_mtj_backend.tokenizer
else:
loadsettings()
@ -1998,6 +2222,7 @@ def lua_has_setting(setting):
"settopk",
"settfs",
"settypical",
"settopa",
"setreppen",
"setreppenslope",
"setreppenrange",
@ -2017,6 +2242,7 @@ def lua_has_setting(setting):
"top_k",
"tfs",
"typical",
"topa",
"reppen",
"reppenslope",
"reppenrange",
@ -2051,6 +2277,7 @@ def lua_get_setting(setting):
if(setting in ("settopk", "topk", "top_k")): return vars.top_k
if(setting in ("settfs", "tfs")): return vars.tfs
if(setting in ("settypical", "typical")): return vars.typical
if(setting in ("settopa", "topa")): return vars.top_a
if(setting in ("setreppen", "reppen")): return vars.rep_pen
if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
@ -2086,6 +2313,7 @@ def lua_set_setting(setting, v):
if(setting in ("settopk", "topk")): vars.top_k = v
if(setting in ("settfs", "tfs")): vars.tfs = v
if(setting in ("settypical", "typical")): vars.typical = v
if(setting in ("settopa", "topa")): vars.top_a = v
if(setting in ("setreppen", "reppen")): vars.rep_pen = v
if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
@ -2510,6 +2738,11 @@ def get_message(msg):
emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'settopa'):
vars.top_a = float(msg['data'])
emit('from_server', {'cmd': 'setlabeltopa', 'data': msg['data']}, broadcast=True)
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'setreppen'):
vars.rep_pen = float(msg['data'])
emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
@ -2663,6 +2896,8 @@ def get_message(msg):
elif(msg['cmd'] == 'uslistrequest'):
unloaded, loaded = getuslist()
emit('from_server', {'cmd': 'buildus', 'data': {"unloaded": unloaded, "loaded": loaded}})
elif(msg['cmd'] == 'samplerlistrequest'):
emit('from_server', {'cmd': 'buildsamplers', 'data': vars.sampler_order})
elif(msg['cmd'] == 'usloaded'):
vars.userscripts = []
for userscript in msg['data']:
@ -2676,6 +2911,16 @@ def get_message(msg):
load_lua_scripts()
unloaded, loaded = getuslist()
sendUSStatItems()
elif(msg['cmd'] == 'samplers'):
sampler_order = msg["data"]
if(not isinstance(sampler_order, list)):
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
if(len(sampler_order) != len(vars.sampler_order)):
raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}")
if(not all(isinstance(e, int) for e in sampler_order)):
raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element")
vars.sampler_order = sampler_order
settingschanged()
elif(msg['cmd'] == 'loadselect'):
vars.loadselect = msg["data"]
elif(msg['cmd'] == 'spselect'):
@ -3104,24 +3349,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
lnheader = len(tokenizer._koboldai_header)
# Calculate token budget
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
lnprompt = len(prompttkns)
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
lnmem = len(memtokens)
if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
lnwi = len(witokens)
if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
if(anotetxt != ""):
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
lnanote = len(anotetkns)
if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
if(vars.useprompt):
@ -3132,14 +3379,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
assert budget >= 0
if(actionlen == 0):
# First/Prompt action
tokens = memtokens + witokens + anotetkns + prompttkns
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
ln = len(tokens) + lnsp
return tokens, ln+1, ln+vars.genamt
@ -3187,12 +3434,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
# Did we get to add the A.N.? If not, do it here
if(anotetxt != ""):
if((not anoteadded) or forceanote):
tokens = memtokens + witokens + anotetkns + prompttkns + tokens
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
else:
tokens = memtokens + witokens + prompttkns + tokens
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
else:
# Prepend Memory, WI, and Prompt before action tokens
tokens = memtokens + witokens + prompttkns + tokens
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
# Send completed bundle to generator
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
@ -3570,6 +3817,7 @@ def sendtocolab(txt, min, max):
'top_k': vars.top_k,
'tfs': vars.tfs,
'typical': vars.typical,
'topa': vars.top_a,
'numseqs': vars.numseqs,
'retfultxt': False
}
@ -3707,12 +3955,14 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
top_k=vars.top_k,
tfs=vars.tfs,
typical=vars.typical,
top_a=vars.top_a,
numseqs=vars.numseqs,
repetition_penalty=vars.rep_pen,
rpslope=vars.rep_pen_slope,
rprange=vars.rep_pen_range,
soft_embeddings=vars.sp,
soft_tokens=soft_tokens,
sampler_order=vars.sampler_order,
)
past = genout
for i in range(vars.numseqs):
@ -3893,6 +4143,7 @@ def refresh_settings():
emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
emit('from_server', {'cmd': 'updatetopa', 'data': vars.top_a}, broadcast=True)
emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)
@ -4469,6 +4720,7 @@ def oairequest(txt, min, max):
'prompt': txt,
'max_tokens': vars.genamt,
'temperature': vars.temp,
'top_a': vars.top_a,
'top_p': vars.top_p,
'top_k': vars.top_k,
'tfs': vars.tfs,

View File

@ -633,11 +633,11 @@ def new_forward_xglm(
layer_outputs = decoder_layer(
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
encoder_hidden_states=encoder_hidden_states.to(device) if encoder_hidden_states is not None else None,
encoder_attention_mask=encoder_attention_mask.to(device) if encoder_attention_mask is not None else None,
layer_head_mask=((head_mask[idx].to(device) if head_mask[idx] is not None else None) if head_mask is not None else None),
encoder_hidden_states=encoder_hidden_states.to(device) if breakmodel and encoder_hidden_states is not None else encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask.to(device) if breakmodel and encoder_attention_mask is not None else encoder_attention_mask,
layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
cross_attn_layer_head_mask=(
(cross_attn_head_mask[idx].to(device) if cross_attn_head_mask[idx] is not None else None) if cross_attn_head_mask is not None else None
(cross_attn_head_mask[idx].to(device) if breakmodel and cross_attn_head_mask[idx] is not None else cross_attn_head_mask[idx]) if cross_attn_head_mask is not None else None
),
past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
output_attentions=output_attentions,
@ -686,3 +686,177 @@ def new_forward_xglm(
attentions=all_self_attns,
cross_attentions=all_cross_attentions,
)
def new_forward_opt(
self,
input_ids=None,
attention_mask=None,
head_mask=None,
past_key_values=None,
inputs_embeds=None,
use_cache=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
):
assert len(gpu_blocks) <= torch.cuda.device_count()
assert sum(gpu_blocks) <= len(self.layers)
ram_blocks = len(self.layers) - sum(gpu_blocks)
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# retrieve input_ids and inputs_embeds
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
elif input_ids is not None:
input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1])
elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1]
else:
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
if inputs_embeds is None:
if breakmodel:
input_ids = input_ids.to(primary_device)
inputs_embeds = self.embed_tokens(input_ids)
# embed positions
if breakmodel:
inputs_embeds = inputs_embeds.to(primary_device)
if attention_mask is None:
attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
positions = self.embed_positions(attention_mask)[:, past_key_values_length:, :]
if breakmodel:
positions = positions.to(primary_device)
attention_mask = self._prepare_decoder_attention_mask(
attention_mask, input_shape, inputs_embeds, past_key_values_length
)
if self.project_in is not None:
inputs_embeds = self.project_in(inputs_embeds)
hidden_states = inputs_embeds + positions
hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
# decoder layers
all_hidden_states = () if output_hidden_states else None
all_self_attns = () if output_attentions else None
next_decoder_cache = () if use_cache else None
if breakmodel and ram_blocks:
copystream = torch.cuda.Stream(device=primary_device, priority=-1)
# check if head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
if attn_mask is not None:
if attn_mask.size()[0] != (len(self.layers)):
raise ValueError(
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
f" {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers):
i = idx
if breakmodel:
if i in range(ram_blocks):
index1 = (i+1)%ram_blocks
for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()):
param1.data = param2.data
for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()):
with torch.cuda.stream(copystream):
torch.cuda.comm.broadcast(param2.data,out = [param1.data])
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
all_hidden_states += (hidden_states,)
dropout_probability = random.uniform(0, 1)
if self.training and (dropout_probability < self.layerdrop):
continue
past_key_value = past_key_values[idx] if past_key_values is not None else None
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False
def create_custom_forward(module):
def custom_forward(*inputs):
# None for past_key_value
return module(*inputs, output_attentions, None)
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
attention_mask,
head_mask[idx] if head_mask is not None else None,
None,
)
else:
if breakmodel:
device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
layer_outputs = decoder_layer(
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
)
hidden_states = layer_outputs[0]
if use_cache:
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
if output_attentions:
all_self_attns += (layer_outputs[1],)
if breakmodel:
if i in range(ram_blocks):
torch.cuda.synchronize()
torch.cuda.empty_cache()
if breakmodel:
if ram_blocks:
del copystream
torch.cuda.empty_cache()
hidden_states = hidden_states.to(primary_device)
if self.project_out is not None:
hidden_states = self.project_out(hidden_states)
if breakmodel:
hidden_states = hidden_states.to(primary_device)
# add hidden states from the last decoder layer
if output_hidden_states:
all_hidden_states += (hidden_states,)
next_cache = next_decoder_cache if use_cache else None
if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
return BaseModelOutputWithPast(
last_hidden_state=hidden_states,
past_key_values=next_cache,
hidden_states=all_hidden_states,
attentions=all_self_attns,
)

View File

@ -867,6 +867,7 @@ return function(_python, _bridged)
---@field settopk integer
---@field settfs number
---@field settypical number
---@field settopa number
---@field setreppen number
---@field setreppenslope number
---@field setreppenrange number
@ -884,6 +885,7 @@ return function(_python, _bridged)
---@field top_k integer
---@field tfs number
---@field typical number
---@field topa number
---@field reppen number
---@field reppenslope number
---@field reppenrange number

View File

@ -7,7 +7,7 @@
"private_outputs": true,
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyOKIa/NDLlYI5j63GXPtkXv",
"authorship_tag": "ABX9TyPbwW79K9/RkYH9i9rkYFyj",
"include_colab_link": true
},
"kernelspec": {
@ -68,14 +68,20 @@
"#@title <b><-- Click this to start KoboldAI</b>\n",
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
"\n",
"Model = \"KoboldAI/GPT-Neo-2.7B-Janeway\" #@param [\"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
"Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\" #@param [\"KoboldAI/fairseq-dense-2.7B-Nerys\", \"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
"\n",
"!nvidia-smi\n",
"from google.colab import drive\n",
"drive.mount('/content/drive/')\n",
"\n",
"!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version"
"if Provider == \"Localtunnel\":\n",
" tunnel = \"--localtunnel yes\"\n",
"else:\n",
" tunnel = \"\"\n",
"\n",
"!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
],
"execution_count": null,
"outputs": []
@ -84,27 +90,32 @@
"cell_type": "markdown",
"source": [
"# GPU Edition Model Descriptions\n",
"| Model | Size | Style | Description |\n",
"| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
"| Model | Size | Style | Description |\n",
"| --- | --- | --- | --- |\n",
"| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
"\n",
"# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
"\n",
"| Model | Size | Style | Drive Space | Description |\n",
"| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n",
"| Skein 6B by VE_FORBDRYDERNE | 6B TPU | Hybrid | 0 GB | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n",
"| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
"| Lit 6B by Haru | 6B TPU | NSFW | 8 GB / 12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
"| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
"| Generic 6B by EleutherAI | 6B TPU | Generic | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n",
"| C1 6B by Haru | 6B TPU | Chatbot | 8 GB / 12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
"| Model | Size | Style | Description |\n",
"| --- | --- | --- | --- |\n",
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
"| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
"| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
"| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
"| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
"| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
"\n",
"\n",
"| Style | Description |\n",
@ -113,7 +124,6 @@
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
"| Hybrid | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n",
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
"\n",
"# How to start KoboldAI in 7 simple steps\n",

View File

@ -7,7 +7,7 @@
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
"<a href=\"https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
@ -65,8 +65,8 @@
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
"\n",
"#@title <b><-- Click this to start KoboldAI</b>\n",
"Model = \"Janeway 13B\" #@param [\"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"Convo 6B\", \"C1 6B\", \"NeoX 20B\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
"Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
"Model = \"Nerys 13B\" #@param [\"Nerys 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"facebook/opt-13b\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
"\n",
"import os\n",
@ -84,6 +84,10 @@
" Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Nerys 13B\":\n",
" Model = \"KoboldAI/fairseq-dense-13B-Nerys\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Shinen 13B\":\n",
" Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
" path = \"\"\n",
@ -97,41 +101,25 @@
" Drive = \"Unextracted (Less Space)\"\n",
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.03,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n",
"elif Model == \"Skein 6B\":\n",
" Model = \"TPUMeshTransformerGPTJ\"\n",
" path = \" -p gpt-j-6b-skein-jax\"\n",
" location = \"colab\"\n",
" download = \" -a https://storage.henk.tech/KoboldAI/skein-jax.txt\"\n",
" extract = \"\"\n",
" Drive = \"Unextracted (Less Space)\"\n",
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings\n",
" Model = \"KoboldAI/GPT-J-6B-Skein\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Janeway 6B\":\n",
" Model = \"TPUMeshTransformerGPTJ\"\n",
" path = \" -p gpt-j-6b-janeway-jax\"\n",
" location = \"colab\"\n",
" download = \" -a https://storage.henk.tech/KoboldAI/janeway-jax.txt\"\n",
" extract = \"\"\n",
" Drive = \"Unextracted (Less Space)\"\n",
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings\n",
" Model = \"KoboldAI/GPT-J-6B-Janeway\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Adventure 6B\":\n",
" Model = \"TPUMeshTransformerGPTJ\"\n",
" path = \" -p gpt-j-6b-adventure-jax\"\n",
" location = \"colab\"\n",
" download = \" -a https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-jax/aria2.txt\"\n",
" extract = \"\"\n",
" Drive = \"Unextracted (Less Space)\"\n",
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings\n",
" Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Lit 6B\":\n",
" Model = \"hakurei/lit-6B\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Shinen 6B\":\n",
" Model = \"TPUMeshTransformerGPTJ\"\n",
" path = \" -p gpt-j-6b-shinen-jax\"\n",
" location = \"colab\"\n",
" download = \" -a https://storage.henk.tech/KoboldAI/shinen-jax.txt\"\n",
" extract = \"\"\n",
" Drive = \"Unextracted (Less Space)\"\n",
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings\n",
" Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
" path = \"\"\n",
" download = \"\"\n",
"elif Model == \"Convo 6B\":\n",
" Model = \"hitomi-team/convo-6B\"\n",
" path = \"\"\n",
@ -159,37 +147,37 @@
"\n",
"| Model | Size | Style | Description |\n",
"| --- | --- | --- | --- |\n",
"| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
"| Skein by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
"| Adventure by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
"| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
"| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
"| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
"| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
"| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
"| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
"| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
"| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
"\n",
"\n",
"# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
"\n",
"| Model | Size | Style | Description |\n",
"| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
"| Model | Size | Style | Description |\n",
"| --- | --- | --- | --- |\n",
"| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
"\n",
"| Style | Description |\n",
"| --------- | ------------------------------------------------------------ |\n",
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
"| Style | Description |\n",
"| --- | --- |\n",
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
"\n",
"---\n",
"## Tips to get the most out of Google Colab\n",

View File

@ -20,4 +20,5 @@ dependencies:
- flask-cloudflared
- flask-ngrok
- lupa==1.10
- transformers>=4.17
- transformers>=4.20.1
- accelerate

View File

@ -20,4 +20,5 @@ dependencies:
- flask-cloudflared
- flask-ngrok
- lupa==1.10
- transformers>=4.17
- transformers>=4.20.1
- accelerate

View File

@ -64,6 +64,17 @@ gensettingstf = [
"step": 0.05,
"default": 1.0,
"tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
},
{
"uitype": "slider",
"unit": "float",
"label": "Top a Sampling",
"id": "settopa",
"min": 0.0,
"max": 1.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Alternative sampling method that reduces the randomness of the AI whenever the probability of one token is much higher than all the others. Higher values have a stronger effect. Set this setting to 0 to disable its effect."
},
{
"uitype": "slider",

BIN
koboldai.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

BIN
koboldaiblue.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

BIN
koboldaigreen.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

37
maps/opt.json Normal file
View File

@ -0,0 +1,37 @@
{
"mtj_compat": "opt",
"mtj_pe": "fixed",
"mtj_config_map": {
"do_layer_norm_before": ["do_layer_norm_before", true],
"d_embed": "word_embed_proj_dim",
"d_model": "hidden_size",
"n_heads": "num_attention_heads",
"layers": "num_hidden_layers"
},
"static_weights": {
"decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
"decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}},
"decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}},
"decoder.final_layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
"decoder.final_layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
"decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}}
},
"layer_weights": {
"decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
"decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
"decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
"decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
"decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
"decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
"decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
"decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
"decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
"decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
"decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
"decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
"decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
"decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
"decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
"decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
}
}

View File

@ -50,49 +50,50 @@ Each edition features different models and requires different hardware to run, t
### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
| Model | Size | Style | Description |
| --- | --- | --- | --- |
| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
| Model | Size | Style | Description |
| --- | --- | --- | --- |
| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
| Skein by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
| Adventure by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
| Model | Size | Style | Description |
| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
| Style | Description |
| --------- | ------------------------------------------------------------ |
| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
| Style | Description |
| --- | --- |
| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
---
## Tips to get the most out of Google Colab
- Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
- KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
- Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
- Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
- Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
- Models stored on Google Drive typically load faster than models we need to download from the internet.
* Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
* KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
* Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
* Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
* Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
* Models stored on Google Drive typically load faster than models we need to download from the internet.
### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
| Model | Size | Type | Description |

View File

@ -1,4 +1,4 @@
transformers>=4.17
transformers>=4.20.1
Flask
Flask-SocketIO
requests
@ -11,3 +11,4 @@ markdown
bleach==4.1.0
sentencepiece
protobuf
accelerate

View File

@ -5,7 +5,7 @@ requests
optax >= 0.0.5, <= 0.0.9
dm-haiku == 0.0.5
jax == 0.2.21
transformers >= 4.17
transformers >= 4.19
progressbar2
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
flask

View File

@ -20,6 +20,7 @@ var button_settings;
var button_format;
var button_softprompt;
var button_userscripts;
var button_samplers;
var button_mode;
var button_mode_label;
var button_send;
@ -106,6 +107,12 @@ var using_webkit_patch = true;
var shift_down = false;
var do_clear_ent = false;
// Whether or not an entry in the Userscripts menu is being dragged
var us_dragging = false;
// Whether or not an entry in the Samplers menu is being dragged
var samplers_dragging = false;
// Display vars
var allowtoggle = false;
var formatcount = 0;
@ -173,20 +180,36 @@ function addSetting(ob) {
window["setting_"+ob.id] = refin; // Is this still needed?
window["label_"+ob.id] = reflb; // Is this still needed?
// Add event function to input
var updateLabelColor = function () {
var value = (ob.unit === "float" ? parseFloat : parseInt)(reflb.val());
if(value > ob.max || value < ob.min) {
reflb.addClass("setting-value-warning");
} else {
reflb.removeClass("setting-value-warning");
}
}
var send = function () {
sliders_throttle(ob.id, function () {
socket.send({'cmd': $(refin).attr('id'), 'data': $(refin).val()});
socket.send({'cmd': $(refin).attr('id'), 'data': $(reflb).val()});
});
reflb.val($(refin).val());
}
refin.on("input", send);
refin.on("input", function (event) {
reflb.val(refin.val());
updateLabelColor();
send();
}).on("change", updateLabelColor);
reflb.on("change", function (event) {
var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value);
if(Number.isNaN(value) || value > ob.max || value < ob.min) {
if(Number.isNaN(value) || (ob.min >= 0 && value < 0)) {
event.target.value = refin.val();
return;
}
if (ob.unit === "float") {
value = parseFloat(value.toFixed(3)); // Round to 3 decimal places to help avoid the number being too long to fit in the box
}
refin.val(value);
reflb.val(value);
updateLabelColor();
send();
});
} else if(ob.uitype == "toggle"){
@ -957,6 +980,16 @@ function hideUSPopup() {
spcontent.html("");
}
function showSamplersPopup() {
samplerspopup.removeClass("hidden");
samplerspopup.addClass("flex");
}
function hideSamplersPopup() {
samplerspopup.removeClass("flex");
samplerspopup.addClass("hidden");
}
function buildLoadList(ar) {
disableButtons([load_accept]);
loadcontent.html("");
@ -1090,6 +1123,29 @@ function buildUSList(unloaded, loaded) {
}
}
function buildSamplerList(samplers) {
samplerslist.html("");
showSamplersPopup();
var i;
var samplers_lookup_table = [
"Top-k Sampling",
"Top-a Sampling",
"Top-p Sampling",
"Tail-free Sampling",
"Typical Sampling",
"Temperature",
]
for(i=0; i<samplers.length; i++) {
samplerslist.append("<div class=\"flex\">\
<div class=\"samplerslistitem flex-row-container\" sid=\""+samplers[i]+"\">\
<div class=\"flex-row\">\
<div>"+samplers_lookup_table[samplers[i]]+"</div>\
</div>\
</div>\
</div>");
}
}
function highlightLoadLine(ref) {
$("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected");
ref.addClass("popuplistselected");
@ -1819,6 +1875,7 @@ $(document).ready(function(){
button_format = $('#btn_format');
button_softprompt = $("#btn_softprompt");
button_userscripts= $("#btn_userscripts");
button_samplers = $("#btn_samplers");
button_mode = $('#btnmode')
button_mode_label = $('#btnmode_label')
button_send = $('#btnsend');
@ -1867,6 +1924,10 @@ $(document).ready(function(){
usloaded = $("#uslistloaded");
us_accept = $("#btn_usaccept");
us_close = $("#btn_usclose");
samplerspopup = $("#samplerscontainer");
samplerslist = $("#samplerslist");
samplers_accept = $("#btn_samplersaccept");
samplers_close = $("#btn_samplersclose");
nspopup = $("#newgamecontainer");
ns_accept = $("#btn_nsaccept");
ns_close = $("#btn_nsclose");
@ -1889,7 +1950,7 @@ $(document).ready(function(){
modelname = msg.modelname;
}
refreshTitle();
connect_status.html("<b>Connected to KoboldAI Process!</b>");
connect_status.html("<b>Connected to KoboldAI!</b>");
connect_status.removeClass("color_orange");
connect_status.addClass("color_green");
// Reset Menus
@ -2059,48 +2120,52 @@ $(document).ready(function(){
newTextHighlight($("#n"+msg.data))
} else if(msg.cmd == "updatetemp") {
// Send current temp value to input
$("#settemp").val(parseFloat(msg.data));
$("#settempcur").val(msg.data);
$("#settemp").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetopp") {
// Send current top p value to input
$("#settopp").val(parseFloat(msg.data));
$("#settoppcur").val(msg.data);
$("#settopp").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetopk") {
// Send current top k value to input
$("#settopk").val(parseFloat(msg.data));
$("#settopkcur").val(msg.data);
$("#settopk").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetfs") {
// Send current tfs value to input
$("#settfs").val(parseFloat(msg.data));
$("#settfscur").val(msg.data);
$("#settfs").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetypical") {
// Send current typical value to input
$("#settypical").val(parseFloat(msg.data));
$("#settypicalcur").val(msg.data);
$("#settypical").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetopa") {
// Send current top a value to input
$("#settopacur").val(msg.data);
$("#settopa").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatereppen") {
// Send current rep pen value to input
$("#setreppen").val(parseFloat(msg.data));
$("#setreppencur").val(msg.data);
$("#setreppen").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatereppenslope") {
// Send current rep pen value to input
$("#setreppenslope").val(parseFloat(msg.data));
$("#setreppenslopecur").val(msg.data);
$("#setreppenslope").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatereppenrange") {
// Send current rep pen value to input
$("#setreppenrange").val(parseFloat(msg.data));
$("#setreppenrangecur").val(msg.data);
$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updateoutlen") {
// Send current output amt value to input
$("#setoutput").val(parseInt(msg.data));
$("#setoutputcur").val(msg.data);
$("#setoutput").val(parseInt(msg.data)).trigger("change");
} else if(msg.cmd == "updatetknmax") {
// Send current max tokens value to input
$("#settknmax").val(parseInt(msg.data));
$("#settknmaxcur").val(msg.data);
$("#settknmax").val(parseInt(msg.data)).trigger("change");
} else if(msg.cmd == "updateikgen") {
// Send current max tokens value to input
$("#setikgen").val(parseInt(msg.data));
$("#setikgencur").val(msg.data);
$("#setikgen").val(parseInt(msg.data)).trigger("change");
} else if(msg.cmd == "setlabeltemp") {
// Update setting label with value from server
$("#settempcur").val(msg.data);
@ -2116,6 +2181,9 @@ $(document).ready(function(){
} else if(msg.cmd == "setlabeltypical") {
// Update setting label with value from server
$("#settypicalcur").val(msg.data);
} else if(msg.cmd == "setlabeltypical") {
// Update setting label with value from server
$("#settopa").val(msg.data);
} else if(msg.cmd == "setlabelreppen") {
// Update setting label with value from server
$("#setreppencur").val(msg.data);
@ -2284,6 +2352,8 @@ $(document).ready(function(){
buildSPList(msg.data);
} else if(msg.cmd == "buildus") {
buildUSList(msg.data.unloaded, msg.data.loaded);
} else if(msg.cmd == "buildsamplers") {
buildSamplerList(msg.data);
} else if(msg.cmd == "askforoverwrite") {
// Show overwrite warning
show([$(".saveasoverwrite")]);
@ -2304,15 +2374,15 @@ $(document).ready(function(){
$("#setnumseqcur").html(msg.data);
} else if(msg.cmd == "updatenumseq") {
// Send current max tokens value to input
$("#setnumseq").val(parseInt(msg.data));
$("#setnumseqcur").html(msg.data);
$("#setnumseq").val(parseInt(msg.data)).trigger("change");
} else if(msg.cmd == "setlabelwidepth") {
// Update setting label with value from server
$("#setwidepthcur").html(msg.data);
} else if(msg.cmd == "updatewidepth") {
// Send current max tokens value to input
$("#setwidepth").val(parseInt(msg.data));
$("#setwidepthcur").html(msg.data);
$("#setwidepth").val(parseInt(msg.data)).trigger("change");
} else if(msg.cmd == "updateuseprompt") {
// Update toggle state
$("#setuseprompt").prop('checked', msg.data).change();
@ -2396,9 +2466,39 @@ $(document).ready(function(){
}, 2);
});
var us_click_handler = function(ev) {
setTimeout(function() {
if (us_dragging) {
return;
}
var target = $(ev.target).closest(".uslistitem")[0];
if ($.contains(document.getElementById("uslistunloaded"), target)) {
document.getElementById("uslistloaded").appendChild(target);
} else {
document.getElementById("uslistunloaded").appendChild(target);
}
}, 10);
}
var samplers_click_handler = function(ev) {
setTimeout(function() {
if (samplers_dragging) {
return;
}
var target = $(ev.target).closest(".samplerslistitem");
var next = target.parent().next().find(".samplerslistitem");
if (!next.length) {
return;
}
next.parent().after(target.parent());
}, 10);
}
// Make the userscripts menu sortable
var us_sortable_settings = {
placeholder: "ussortable-placeholder",
start: function() { us_dragging = true; },
stop: function() { us_dragging = false; },
delay: 2,
cursor: "move",
tolerance: "pointer",
@ -2407,12 +2507,28 @@ $(document).ready(function(){
scrollSensitivity: 64,
scrollSpeed: 10,
}
$(usunloaded).sortable($.extend({
usunloaded.sortable($.extend({
connectWith: "#uslistloaded",
}, us_sortable_settings));
$(usloaded).sortable($.extend({
}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
usloaded.sortable($.extend({
connectWith: "#uslistunloaded",
}, us_sortable_settings));
}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
// Make the samplers menu sortable
var samplers_sortable_settings = {
placeholder: "samplerssortable-placeholder",
start: function() { samplers_dragging = true; },
stop: function() { samplers_dragging = false; },
delay: 2,
cursor: "move",
tolerance: "pointer",
opacity: 0.21,
revert: 173,
scrollSensitivity: 64,
scrollSpeed: 10,
}
samplerslist.sortable($.extend({
}, samplers_sortable_settings)).on("click", ".samplerslistitem", samplers_click_handler);
// Bind actions to UI buttons
button_send.on("click", function(ev) {
@ -2548,6 +2664,10 @@ $(document).ready(function(){
button_userscripts.on("click", function(ev) {
socket.send({'cmd': 'uslistrequest', 'data': ''});
});
button_samplers.on("click", function(ev) {
socket.send({'cmd': 'samplerlistrequest', 'data': ''});
});
load_close.on("click", function(ev) {
hideLoadPopup();
@ -2581,6 +2701,16 @@ $(document).ready(function(){
socket.send({'cmd': 'usload', 'data': ''});
hideUSPopup();
});
samplers_close.on("click", function(ev) {
hideSamplersPopup();
});
samplers_accept.on("click", function(ev) {
hideMessage();
socket.send({'cmd': 'samplers', 'data': samplerslist.find(".samplerslistitem").map(function() { return parseInt($(this).attr("sid")); }).toArray()});
hideSamplersPopup();
});
button_newgame.on("click", function(ev) {
if(connected) {

View File

@ -22,6 +22,14 @@ chunk.editing, chunk.editing * {
font-style: normal !important;
}
.setting-value-warning {
color: #ff7777;
}
.setting-value-warning:focus {
color: #ffaaaa !important;
}
.settinglabel input {
width: 5ch;
background-color: inherit;
@ -449,6 +457,26 @@ body.connected #popupfooter, #popupfooter.always-available {
overflow-wrap: anywhere;
}
#samplerspopup {
width: 300px;
background-color: #262626;
margin-top: 100px;
}
@media (max-width: 768px) {
#samplerspopup {
width: 100%;
background-color: #262626;
margin-top: 100px;
}
}
#samplerslist {
height: 300px;
overflow-y: scroll;
overflow-wrap: anywhere;
}
#nspopup {
width: 350px;
background-color: #262626;
@ -742,7 +770,7 @@ body.connected .dropdown-item:hover, .dropdown-item.always-available:hover {
background-color: #3bf723;
}
.ussortable-placeholder {
.ussortable-placeholder, .samplerssortable-placeholder {
height: 4px;
background-color: #3bf723;
}
@ -1332,7 +1360,7 @@ body.connected .popupfooter, .popupfooter.always-available {
background-color: #688f1f;
}
.uslistitem {
.uslistitem, .samplerslistitem {
padding: 12px 10px 12px 10px;
display: flex;
flex-grow: 1;
@ -1344,11 +1372,11 @@ body.connected .popupfooter, .popupfooter.always-available {
transition: background-color 0.25s ease-in;
}
.uslistitemsub {
.uslistitemsub, .samplerslistitemsub {
color: #ba9;
}
.uslistitem:hover {
.uslistitem:hover, .samplerslistitem:hover {
cursor: move;
background-color: #688f1f;
}

View File

@ -9,7 +9,7 @@
<link rel="stylesheet" href="static/bootstrap.min.css">
<link rel="stylesheet" href="static/bootstrap-toggle.min.css">
<link rel="stylesheet" href="static/open-iconic-bootstrap.min.css">
<link rel="stylesheet" href="static/custom.css?ver=1.17a">
<link rel="stylesheet" href="static/custom.css?ver=1.18.1a">
<script src="static/jquery-3.6.0.min.js"></script>
<script src="static/jquery-ui.sortable.min.js"></script>
@ -17,7 +17,7 @@
<script src="static/bootstrap.min.js"></script>
<script src="static/bootstrap-toggle.min.js"></script>
<script src="static/rangy-core.min.js"></script>
<script src="static/application.js?ver=1.17e"></script>
<script src="static/application.js?ver=1.18.1a"></script>
</head>
<body>
<input type="file" id="remote-save-select" accept="application/json" style="display:none">
@ -71,6 +71,9 @@
<li class="nav-item">
<a class="nav-link" href="#" id="btn_format">Formatting</a>
</li>
<li class="nav-item">
<a class="nav-link" href="#" id="btn_samplers">Samplers</a>
</li>
<li class="nav-item">
<a class="nav-link" href="#" id="btn_userscripts">Userscripts</a>
</li>
@ -299,6 +302,19 @@
</div>
</div>
</div>
<div class="popupcontainer hidden" id="samplerscontainer">
<div id="samplerspopup">
<div class="popuptitlebar">
<div class="popuptitletext">Drag-and-drop to change the order in which the samplers are applied</div>
</div>
<div id="samplerslist">
</div>
<div class="popupfooter">
<button type="button" class="btn btn-primary" id="btn_samplersaccept">Save</button>
<button type="button" class="btn btn-primary" id="btn_samplersclose">Cancel</button>
</div>
</div>
</div>
<div class="popupcontainer hidden" id="loadcontainerdelete">
<div id="loadpopupdelete">
<div class="popuptitlebar">

View File

@ -27,6 +27,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''
import utils
import multiprocessing
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
import progressbar
@ -63,11 +65,13 @@ def stopping_callback(generated, n_generated, excluded_world_info) -> Tuple[List
def settings_callback() -> dict:
return {
"sampler_order": utils.default_sampler_order.copy(),
"top_p": 0.9,
"temp": 0.5,
"top_k": 0,
"tfs": 1.0,
"typical": 1.0,
"top_a": 0.0,
"repetition_penalty": 1.0,
"rpslope": 0.0,
"rprange": 0,
@ -156,10 +160,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
logits[tokens] = penalty_logits
return logits
def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
'''
This gets called by generate_loop_fn to apply a series of 5 filters
to the logits (top-k, then top-p, then TFS, then typical, then temperature)
This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits
'''
# Top-k (keep only the k tokens with the highest logits and remove
@ -178,8 +182,18 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
sorted_indices_to_remove,
)
return np.where(indices_to_remove, -np.inf, logits)
if top_k > 0:
logits = top_k_filter(logits)
# Top-a (remove all tokens that have softmax probability less than
# a*m^2 where m is the maximum softmax probability)
def top_a_filter(logits):
# Replace every element in the logits array
# with e (Euler's number) to the power of that element, and divide
# each element of the new array by the sum of the elements in the
# new array
probabilities = np.array(jax.nn.softmax(logits), copy=True)
# Find the largest probability
probs_max = probabilities.max()
# Remove tokens
return np.where(probabilities < probs_max * probs_max * top_a, -np.inf, logits)
# Top-p (after sorting the remaining tokens again in descending order of
# logit, remove the ones that have cumulative softmax probability
# greater than p)
@ -205,8 +219,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
sorted_indices_to_remove,
)
return np.where(indices_to_remove, -np.inf, logits)
if top_p < 1.0:
logits = top_p_filter(logits)
# Tail free sampling (basically top-p a second time on remaining tokens
# except it's the "cumulative normalized absolute second finite
# differences of the softmax probabilities" instead of just the
@ -245,8 +257,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
sorted_indices_to_remove,
)
return np.where(indices_to_remove, -np.inf, logits)
if tfs < 1.0:
logits = tail_free_filter(logits)
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
def typical_filter(logits):
# Compute softmax probabilities and the natural logarithms of them
@ -276,10 +286,16 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
sorted_indices_to_remove,
)
return np.where(indices_to_remove, -jnp.inf, logits)
if typical < 1.0:
logits = typical_filter(logits)
# Temperature (just divide the logits by the temperature)
logits /= temp
def temp_filter(logits):
return logits / temp
for k in sampler_order:
if k == 0 and top_k > 0: logits = top_k_filter(logits)
if k == 1 and top_a > 0.0: logits = top_a_filter(logits)
if k == 2 and top_p < 1.0: logits = top_p_filter(logits)
if k == 3 and tfs < 1.0: logits = tail_free_filter(logits)
if k == 4 and typical < 1.0: logits = typical_filter(logits)
if k == 5 and temp != 1.0: logits = temp_filter(logits)
# Finally, pick one token using the softmax thingy again (it gives
# an array whose elements sum to 1 so it can be used nicely as a
# probability distribution)
@ -330,10 +346,10 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
# positions in the logits array
return logits.at[tokens].set(penalty_logits)
def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
'''
This gets called by generate_loop_fn to apply a series of 5 filters
to the logits (top-k, then top-p, then TFS, then typical, then temperature)
This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits
'''
# Top-k (keep only the k tokens with the highest logits and remove
@ -352,7 +368,18 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
sorted_indices_to_remove,
)
return jnp.where(indices_to_remove, -jnp.inf, logits)
logits = jax.lax.cond(top_k > 0, top_k_filter, lambda x: x, logits)
# Top-a (remove all tokens that have softmax probability less than
# a*m^2 where m is the maximum softmax probability)
def top_a_filter(logits):
# Replace every element in the logits array
# with e (Euler's number) to the power of that element, and divide
# each element of the new array by the sum of the elements in the
# new array
probabilities = jax.nn.softmax(logits)
# Find the largest probability
probs_max = probabilities.max()
# Remove tokens
return jnp.where(probabilities < probs_max * probs_max * top_a, -jnp.inf, logits)
# Top-p (after sorting the remaining tokens again in descending order of
# logit, remove the ones that have cumulative softmax probability
# greater than p)
@ -378,7 +405,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
sorted_indices_to_remove,
)
return jnp.where(indices_to_remove, -jnp.inf, logits)
logits = jax.lax.cond(top_p < 1.0, top_p_filter, lambda x: x, logits)
# Tail free sampling (basically top-p a second time on remaining tokens
# except it's the "cumulative normalized absolute second finite
# differences of the softmax probabilities" instead of just the
@ -417,7 +443,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
sorted_indices_to_remove,
)
return jnp.where(indices_to_remove, -jnp.inf, logits)
logits = jax.lax.cond(tfs < 1.0, tail_free_filter, lambda x: x, logits)
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
def typical_filter(logits):
# Compute softmax probabilities and the natural logarithms of them
@ -446,11 +471,16 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
sorted_indices_to_remove,
)
return jnp.where(indices_to_remove, -jnp.inf, logits)
logits = jax.lax.cond(typical < 1.0, typical_filter, lambda x: x, logits)
# Temperature (just divide the logits by the temperature)
def temp_filter(logits):
return logits / temp
logits = jax.lax.cond(True, temp_filter, lambda x: x, logits)
for k in sampler_order:
logits = jax.lax.cond(jnp.logical_and(k == 0, top_k > 0), top_k_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 1, top_a > 0.0), top_a_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 2, top_p < 1.0), top_p_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits)
# Finally, pick one token using the softmax thingy again (it gives
# an array whose elements sum to 1 so it can be used nicely as a
# probability distribution)
@ -804,6 +834,7 @@ def infer_static(
top_k=0,
tfs=1.0,
typical=1.0,
top_a=0.0,
repetition_penalty=1.0,
rpslope=0.0,
rprange=0,
@ -811,8 +842,12 @@ def infer_static(
gen_len=80,
soft_embeddings: Optional[np.array] = None,
soft_tokens: Optional[np.array] = None,
sampler_order: Optional[List[int]] = None,
) -> List[np.array]:
maps.thread_resources.env = thread_resources_env
if sampler_order is None:
sampler_order = utils.default_sampler_order.copy()
sampler_order = np.uint32(sampler_order)
total_batch = 1
tokens = context
if(soft_tokens is not None):
@ -823,10 +858,12 @@ def infer_static(
batched_tokens = np.array([padded_tokens] * total_batch)
samples = []
batched_generator_params = {
"sampler_order": np.repeat(sampler_order[np.newaxis], total_batch, axis=0),
"temp": temp * np.ones(total_batch),
"top_p": top_p * np.ones(total_batch),
"tfs": tfs * np.ones(total_batch),
"typical": typical * np.ones(total_batch),
"top_a": top_a * np.ones(total_batch),
"repetition_penalty": repetition_penalty * np.ones(total_batch),
"rpslope": rpslope * np.ones(total_batch),
"rprange": np.full(total_batch, rprange, dtype=np.uint32),
@ -983,6 +1020,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
global thread_resources_env, seq, tokenizer, network, params
if not hasattr(vars, "sampler_order") or not vars.sampler_order:
vars.sampler_order = utils.default_sampler_order.copy()
default_params = {
"compat": "j",
"layers": 28,
@ -1054,7 +1094,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
# by the number of TPU cores, and fall back to one core if an even
# number of TPU cores is not possible.
for c in (8, 6, 4, 2, 1):
if 0 == params["n_heads"] % c == params["d_model"] % c:
if 0 == params["n_heads"] % c == params.get("d_embed", params["d_model"]) % c:
params["cores_per_replica"] = c
break
@ -1079,6 +1119,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
return old_encode(s).ids
return encode
tokenizer.encode = new_encode(tokenizer.encode)
tokenizer._koboldai_header = []
elif not hf_checkpoint:
if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")
@ -1092,13 +1133,18 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
print("Connecting to your Colab instance's TPU", flush=True)
spinner = multiprocessing.Process(target=show_spinner, args=())
spinner.start()
colab_tpu_addr = os.environ['COLAB_TPU_ADDR'].split(':')[0]
url = f'http://{colab_tpu_addr}:8475/requestversion/{driver_version}'
if os.environ.get('COLAB_TPU_ADDR', '') != '':
tpu_address = os.environ['COLAB_TPU_ADDR'] # Colab
else:
tpu_address = os.environ['TPU_NAME'] # Kaggle
tpu_address = tpu_address.replace("grpc://", "")
tpu_address_without_port = tpu_address.split(':', 1)[0]
url = f'http://{tpu_address_without_port}:8475/requestversion/{driver_version}'
config.FLAGS.jax_xla_backend = "tpu_driver"
config.FLAGS.jax_backend_target = "grpc://" + tpu_address
requests.post(url)
spinner.terminate()
print()
config.FLAGS.jax_xla_backend = "tpu_driver"
config.FLAGS.jax_backend_target = "grpc://" + os.environ['COLAB_TPU_ADDR']
cores_per_replica = params["cores_per_replica"]
seq = params["seq"]
@ -1158,13 +1204,27 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
import functools
def callback(model_dict, f, **_):
if callback.nested:
return
callback.nested = True
with zipfile.ZipFile(f, "r") as z:
try:
last_storage_key = None
f = None
current_offset = 0
print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
for key in tqdm(sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
if utils.current_shard == 0:
print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
if utils.num_shards is None or utils.current_shard == 0:
if utils.num_shards is not None:
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
else:
num_tensors = len(model_dict)
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
if utils.num_shards is not None:
utils.current_shard += 1
for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
# Some model weights are used by transformers but not by MTJ.
# We have to materialize these weights anyways because
@ -1173,6 +1233,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
# tensors, which don't take up any actual CPU or TPU memory.
if key not in model_spec:
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
utils.bar.update(1)
continue
storage_key = model_dict[key].key
@ -1200,6 +1261,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
# MTJ requires certain mathematical operations to be performed
# on tensors in order for them to be in the correct format
if "remove_first_two_rows" in transforms:
tensor = tensor[2:]
if "divide_by_shards" in transforms:
tensor /= params["cores_per_replica"]
if "vocab_pad" in transforms:
@ -1223,6 +1286,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
np.empty(params["cores_per_replica"]),
)
utils.bar.update(1)
if utils.num_shards is not None and utils.current_shard < utils.num_shards:
return
# Check for tensors that MTJ needs that were not provided in the
# HF model
for mk, mv in network.state["params"].items():
@ -1241,8 +1309,13 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
print("\n\nERROR: " + error, file=sys.stderr)
raise RuntimeError(error)
finally:
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
utils.bar.close()
utils.bar = None
callback.nested = False
if isinstance(f, zipfile.ZipExtFile):
f.close()
callback.nested = False
if os.path.isdir(vars.model.replace('/', '_')):
import shutil
@ -1252,6 +1325,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
if(os.path.isdir(vars.custmodpth)):
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1264,6 +1341,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1276,6 +1357,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
else:
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e:
pass
try:
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
except Exception as e:
try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")

View File

@ -50,4 +50,4 @@ git remote add origin %origin%
git fetch --all
git checkout %branch% -f
git reset --hard origin/%branch%
cmd /k
%windir%\system32\timeout -t 10

View File

@ -5,9 +5,22 @@ import json
import subprocess
import tempfile
import requests
import requests.adapters
import time
from tqdm.auto import tqdm
import os
import itertools
from typing import Optional
vars = None
num_shards: Optional[int] = None
current_shard = 0
from_pretrained_model_name = ""
from_pretrained_index_filename: Optional[str] = None
from_pretrained_kwargs = {}
bar = None
default_sampler_order = [0, 1, 2, 3, 4, 5]
#==================================================================#
# Decorator to prevent a function's actions from being run until
@ -130,10 +143,18 @@ def encodenewlines(txt):
def decodenewlines(txt):
if(vars.newlinemode == "s"):
return txt.replace("</s>", '\n')
if(vars.newlinemode == "ns"):
return txt.replace("</s>", '')
return txt
#==================================================================#
# Downloads sharded huggingface checkpoints using aria2c if possible
# Returns number of layers given an HF model config
#==================================================================#
def num_layers(config):
return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers
#==================================================================#
# Downloads huggingface checkpoints using aria2c if possible
#==================================================================#
def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
import transformers
@ -191,6 +212,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
if not urls:
return
etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
headers = [requests.head(u, headers=headers, allow_redirects=True, proxies=proxies, timeout=10).headers for u in urls]
filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
for n in filenames:
path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
@ -206,22 +228,75 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
path = os.path.join(_cache_dir, n)
if os.path.exists(path):
os.remove(path)
total_length = sum(int(h["Content-Length"]) for h in headers)
lengths = {}
aria2_config = "\n".join(f"{u}\n out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
f.write(aria2_config)
f.flush()
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming", "false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in p.stdout:
print(line.decode(), end="", flush=True)
path = f.name
s = requests.Session()
s.mount("http://", requests.adapters.HTTPAdapter(max_retries=requests.adapters.Retry(total=120, backoff_factor=1)))
bar = None
done = False
secret = os.urandom(17).hex()
try:
os.remove(path)
except OSError:
pass
with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
f.write(aria2_config)
f.flush()
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
while p.poll() is None:
r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
if not r:
s.close()
if bar is not None:
bar.n = bar.total
bar.close()
p.terminate()
done = True
break
if bar is None:
bar = tqdm(total=total_length, desc=f"[aria2] Downloading model", unit="B", unit_scale=True, unit_divisor=1000)
visited = set()
for x in r:
filename = x["files"][0]["path"]
lengths[filename] = (int(x["completedLength"]), int(x["totalLength"]))
visited.add(filename)
for k, v in lengths.items():
if k not in visited:
lengths[k] = (v[1], v[1])
bar.n = sum(v[0] for v in lengths.values())
bar.update()
time.sleep(0.1)
path = f.name
except Exception as e:
p.terminate()
raise e
finally:
try:
os.remove(path)
except OSError:
pass
code = p.wait()
if code:
if not done and code:
raise OSError(f"aria2 exited with exit code {code}")
for u, t, n in zip(urls, etags, filenames):
os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
json.dump({"url": u, "etag": t}, f)
#==================================================================#
# Given the path to a pytorch_model.bin.index.json, returns how many
# shards there are in the model
#==================================================================#
def get_num_shards(filename):
with open(filename) as f:
map_data = json.load(f)
return len(set(map_data["weight_map"].values()))
#==================================================================#
# Given the name/path of a sharded model and the path to a
# pytorch_model.bin.index.json, returns a list of weight names in the
# sharded model. Requires lazy loader to be enabled to work properl
#==================================================================#
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
import transformers.modeling_utils
import torch
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision, mirror=mirror)
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))

View File

@ -148,3 +148,32 @@ class TypicalLogitsWarper(LogitsWarper):
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
scores = scores.masked_fill(indices_to_remove, self.filter_value)
return scores
class TopALogitsWarper(LogitsWarper):
def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
top_a = float(top_a)
if top_a < 0 or top_a > 1.0:
raise ValueError(f"`top_a` has to be a float >= 0 and <= 1, but is {top_a}")
self.top_a = top_a
self.filter_value = filter_value
self.min_tokens_to_keep = min_tokens_to_keep
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
if self.filter_value >= 1.0:
return scores
sorted_logits, sorted_indices = torch.sort(scores, descending=True)
probs = sorted_logits.softmax(dim=-1)
# Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)
probs_max = probs[..., 0, None]
sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a
if self.min_tokens_to_keep > 1:
# Keep at least min_tokens_to_keep
sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
scores = scores.masked_fill(indices_to_remove, self.filter_value)
return scores