mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge branch 'main' into neox
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -31,3 +31,6 @@ Uninstall
|
|||||||
|
|
||||||
# Ignore compiled Python files.
|
# Ignore compiled Python files.
|
||||||
*.pyc
|
*.pyc
|
||||||
|
|
||||||
|
# Don't ignore defaults
|
||||||
|
!defaults/*
|
32
Uninstall.bat
Normal file
32
Uninstall.bat
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
@echo off
|
||||||
|
cd /D %~dp0
|
||||||
|
TITLE KoboldAI Uninstall Helper
|
||||||
|
SET /P M=<loader.settings
|
||||||
|
IF %M%==3 subst /D B: >nul
|
||||||
|
IF %M%==1 subst /D K: >nul
|
||||||
|
|
||||||
|
IF "%1" == "FORCE" GOTO UNINSTALL
|
||||||
|
|
||||||
|
IF EXIST "Uninstall\unins000.exe" (
|
||||||
|
start Uninstall\unins000.exe
|
||||||
|
exit
|
||||||
|
) ELSE (
|
||||||
|
echo This will remove all KoboldAI folders that do not contain user data
|
||||||
|
pause
|
||||||
|
GOTO UNINSTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
:UNINSTALL
|
||||||
|
echo Uninstallation in progress, please wait...
|
||||||
|
set DM=Y
|
||||||
|
attrib -h .git >nul
|
||||||
|
for /d %%D in (*) do if not "%%~nxD"=="stories" if not "%%~nxD"=="userscripts" if not "%%~nxD"=="settings" if not "%%~nxD"=="softprompts" if not "%%~nxD"=="models" if not "%%~nxD"=="Uninstall" rmdir /S /Q %%~nxD
|
||||||
|
for %%i in (*) do if not "%%i"=="Uninstall.bat" del /q "%%i"
|
||||||
|
set /P DM=Would you like to delete the models folder? (Y/n) :
|
||||||
|
IF %DM%==Y rmdir models /s /q
|
||||||
|
IF %DM%==y rmdir models /s /q
|
||||||
|
set DM=N
|
||||||
|
set /P DM=Would you like to delete all other user folders? (y/N) :
|
||||||
|
IF %DM%==Y rmdir stories userscripts settings softprompts /s /q
|
||||||
|
IF %DM%==y rmdir stories userscripts settings softprompts /s /q
|
||||||
|
del Uninstall.bat
|
488
aiserver.py
488
aiserver.py
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# KoboldAI
|
# KoboldAI
|
||||||
# Version: 1.17.0
|
# Version: 1.18.1
|
||||||
# By: KoboldAIDev and the KoboldAI Community
|
# By: KoboldAIDev and the KoboldAI Community
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
|
|
||||||
@@ -16,6 +16,9 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '1'
|
|||||||
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
||||||
from eventlet import tpool
|
from eventlet import tpool
|
||||||
|
|
||||||
|
import logging
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
|
|
||||||
from os import path, getcwd
|
from os import path, getcwd
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
@@ -23,6 +26,7 @@ import json
|
|||||||
import collections
|
import collections
|
||||||
import zipfile
|
import zipfile
|
||||||
import packaging
|
import packaging
|
||||||
|
import packaging.version
|
||||||
import contextlib
|
import contextlib
|
||||||
import traceback
|
import traceback
|
||||||
import threading
|
import threading
|
||||||
@@ -54,6 +58,27 @@ if lupa.LUA_VERSION[:2] != (5, 4):
|
|||||||
print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)
|
print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
# Make sure tqdm progress bars display properly in Colab
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
old_init = tqdm.__init__
|
||||||
|
def new_init(self, *args, **kwargs):
|
||||||
|
old_init(self, *args, **kwargs)
|
||||||
|
if(self.ncols == 0 and kwargs.get("ncols") != 0):
|
||||||
|
self.ncols = 99
|
||||||
|
tqdm.__init__ = new_init
|
||||||
|
|
||||||
|
# Fix some issues with the OPT tokenizer
|
||||||
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
|
||||||
|
@classmethod
|
||||||
|
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
|
||||||
|
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
|
||||||
|
tokenizer._koboldai_header = tokenizer.encode("")
|
||||||
|
tokenizer.add_bos_token = False
|
||||||
|
tokenizer.add_prefix_space = False
|
||||||
|
return tokenizer
|
||||||
|
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Variables & Storage
|
# Variables & Storage
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
@@ -76,9 +101,9 @@ mainmenu = [
|
|||||||
["Adventure Models", "adventurelist", ""],
|
["Adventure Models", "adventurelist", ""],
|
||||||
["Novel Models", "novellist", ""],
|
["Novel Models", "novellist", ""],
|
||||||
["NSFW Models", "nsfwlist", ""],
|
["NSFW Models", "nsfwlist", ""],
|
||||||
["Chatbot Models", "chatlist", ""],
|
|
||||||
["Untuned GPT-Neo/J", "gptneolist", ""],
|
["Untuned GPT-Neo/J", "gptneolist", ""],
|
||||||
["Untuned Fairseq Dense", "fsdlist", ""],
|
["Untuned Fairseq Dense", "fsdlist", ""],
|
||||||
|
["Untuned OPT", "optlist", ""],
|
||||||
["Untuned XGLM", "xglmlist", ""],
|
["Untuned XGLM", "xglmlist", ""],
|
||||||
["Untuned GPT2", "gpt2list", ""],
|
["Untuned GPT2", "gpt2list", ""],
|
||||||
["Online Services", "apilist", ""],
|
["Online Services", "apilist", ""],
|
||||||
@@ -86,8 +111,10 @@ mainmenu = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
adventurelist= [
|
adventurelist= [
|
||||||
|
["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
|
||||||
["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
|
["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"],
|
||||||
["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
|
["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
|
||||||
|
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
|
||||||
["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
|
["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
|
||||||
["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
|
["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
|
||||||
["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
|
["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
|
||||||
@@ -95,11 +122,13 @@ adventurelist= [
|
|||||||
]
|
]
|
||||||
|
|
||||||
novellist= [
|
novellist= [
|
||||||
|
["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"],
|
||||||
["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
|
["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"],
|
||||||
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
|
["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"],
|
||||||
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
|
["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"],
|
||||||
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
|
["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"],
|
||||||
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
|
["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"],
|
||||||
|
["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"],
|
||||||
["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
|
["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"],
|
||||||
["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
|
["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"],
|
||||||
["Return to Main Menu", "Return", ""],
|
["Return to Main Menu", "Return", ""],
|
||||||
@@ -137,6 +166,17 @@ gpt2list = [
|
|||||||
["Return to Main Menu", "Return", ""],
|
["Return to Main Menu", "Return", ""],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
optlist = [
|
||||||
|
["OPT 30B", "facebook/opt-30b", "64GB"],
|
||||||
|
["OPT 13B", "facebook/opt-13b", "32GB"],
|
||||||
|
["OPT 6.7B", "facebook/opt-6.7b", "16GB"],
|
||||||
|
["OPT 2.7B", "facebook/opt-2.7b", "8GB"],
|
||||||
|
["OPT 1.3B", "facebook/opt-1.3b", "4GB"],
|
||||||
|
["OPT 350M", "facebook/opt-350m", "2GB"],
|
||||||
|
["OPT 125M", "facebook/opt-125m", "1GB"],
|
||||||
|
["Return to Main Menu", "Return", ""],
|
||||||
|
]
|
||||||
|
|
||||||
fsdlist = [
|
fsdlist = [
|
||||||
["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
|
["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"],
|
||||||
["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
|
["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"],
|
||||||
@@ -172,7 +212,7 @@ class vars:
|
|||||||
model_type = "" # Model Type (Automatically taken from the model config)
|
model_type = "" # Model Type (Automatically taken from the model config)
|
||||||
noai = False # Runs the script without starting up the transformers pipeline
|
noai = False # Runs the script without starting up the transformers pipeline
|
||||||
aibusy = False # Stops submissions while the AI is working
|
aibusy = False # Stops submissions while the AI is working
|
||||||
max_length = 1024 # Maximum number of tokens to submit per action
|
max_length = 2048 # Maximum number of tokens to submit per action
|
||||||
ikmax = 3000 # Maximum number of characters to submit to InferKit
|
ikmax = 3000 # Maximum number of characters to submit to InferKit
|
||||||
genamt = 80 # Amount of text for each action to generate
|
genamt = 80 # Amount of text for each action to generate
|
||||||
ikgen = 200 # Number of characters for InferKit to generate
|
ikgen = 200 # Number of characters for InferKit to generate
|
||||||
@@ -182,6 +222,7 @@ class vars:
|
|||||||
temp = 0.5 # Default generator temperature
|
temp = 0.5 # Default generator temperature
|
||||||
top_p = 0.9 # Default generator top_p
|
top_p = 0.9 # Default generator top_p
|
||||||
top_k = 0 # Default generator top_k
|
top_k = 0 # Default generator top_k
|
||||||
|
top_a = 0.0 # Default generator top-a
|
||||||
tfs = 1.0 # Default generator tfs (tail-free sampling)
|
tfs = 1.0 # Default generator tfs (tail-free sampling)
|
||||||
typical = 1.0 # Default generator typical sampling threshold
|
typical = 1.0 # Default generator typical sampling threshold
|
||||||
numseqs = 1 # Number of sequences to ask the generator to create
|
numseqs = 1 # Number of sequences to ask the generator to create
|
||||||
@@ -228,6 +269,8 @@ class vars:
|
|||||||
# badwords = [] # Array of str/chr values that should be removed from output
|
# badwords = [] # Array of str/chr values that should be removed from output
|
||||||
badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
|
badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
|
||||||
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
|
badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]]
|
||||||
|
badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]]
|
||||||
|
fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
|
||||||
deletewi = None # Temporary storage for UID to delete
|
deletewi = None # Temporary storage for UID to delete
|
||||||
wirmvwhtsp = False # Whether to remove leading whitespace from WI entries
|
wirmvwhtsp = False # Whether to remove leading whitespace from WI entries
|
||||||
widepth = 3 # How many historical actions to scan for WI hits
|
widepth = 3 # How many historical actions to scan for WI hits
|
||||||
@@ -262,7 +305,7 @@ class vars:
|
|||||||
recentrngm = None # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None
|
recentrngm = None # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None
|
||||||
useprompt = False # Whether to send the full prompt with every submit action
|
useprompt = False # Whether to send the full prompt with every submit action
|
||||||
breakmodel = False # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only
|
breakmodel = False # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only
|
||||||
bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM only, currently)
|
bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM/OPT only, currently)
|
||||||
nobreakmodel = False # Something specifically requested Breakmodel to be disabled (For example a models config)
|
nobreakmodel = False # Something specifically requested Breakmodel to be disabled (For example a models config)
|
||||||
smandelete = False # Whether stories can be deleted from inside the browser
|
smandelete = False # Whether stories can be deleted from inside the browser
|
||||||
smanrename = False # Whether stories can be renamed from inside the browser
|
smanrename = False # Whether stories can be renamed from inside the browser
|
||||||
@@ -274,6 +317,7 @@ class vars:
|
|||||||
acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses)
|
acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses)
|
||||||
comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI
|
comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI
|
||||||
comregex_ui = re.compile(r'(<\|(?:.|\n)*?\|>)') # Pattern for matching comments in the editor
|
comregex_ui = re.compile(r'(<\|(?:.|\n)*?\|>)') # Pattern for matching comments in the editor
|
||||||
|
sampler_order = utils.default_sampler_order.copy()
|
||||||
chatmode = False
|
chatmode = False
|
||||||
chatname = "You"
|
chatname = "You"
|
||||||
adventure = False
|
adventure = False
|
||||||
@@ -288,7 +332,7 @@ class vars:
|
|||||||
quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
|
quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
|
||||||
debug = False # If set to true, will send debug information to the client for display
|
debug = False # If set to true, will send debug information to the client for display
|
||||||
lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
|
lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
|
||||||
use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" # Whether or not we're in a Colab TPU instance and are going to use the TPU rather than the CPU
|
use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" or os.environ.get("TPU_NAME", "") != "" # Whether or not we're in a Colab TPU instance or Kaggle TPU instance and are going to use the TPU rather than the CPU
|
||||||
|
|
||||||
utils.vars = vars
|
utils.vars = vars
|
||||||
|
|
||||||
@@ -379,7 +423,7 @@ def device_list(n_layers, primary=None, selected=None):
|
|||||||
def device_config(config):
|
def device_config(config):
|
||||||
global breakmodel, generator
|
global breakmodel, generator
|
||||||
import breakmodel
|
import breakmodel
|
||||||
n_layers = config.num_layers if hasattr(config, "num_layers") else config.n_layer
|
n_layers = utils.num_layers(config)
|
||||||
if(args.breakmodel_gpulayers is not None):
|
if(args.breakmodel_gpulayers is not None):
|
||||||
try:
|
try:
|
||||||
breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
|
breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
|
||||||
@@ -452,7 +496,7 @@ def device_config(config):
|
|||||||
# If all layers are on the same device, use the old GPU generation mode
|
# If all layers are on the same device, use the old GPU generation mode
|
||||||
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
|
||||||
breakmodel.gpu_blocks.pop()
|
breakmodel.gpu_blocks.pop()
|
||||||
if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, config.num_layers if hasattr(config, "num_layers") else config.n_layer)):
|
if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, utils.num_layers(config))):
|
||||||
vars.breakmodel = False
|
vars.breakmodel = False
|
||||||
vars.usegpu = True
|
vars.usegpu = True
|
||||||
vars.gpu_device = len(breakmodel.gpu_blocks)-1
|
vars.gpu_device = len(breakmodel.gpu_blocks)-1
|
||||||
@@ -484,22 +528,33 @@ def move_model_to_devices(model):
|
|||||||
model.lm_head.to(breakmodel.primary_device)
|
model.lm_head.to(breakmodel.primary_device)
|
||||||
if(hasattr(model.transformer, 'wpe')):
|
if(hasattr(model.transformer, 'wpe')):
|
||||||
model.transformer.wpe.to(breakmodel.primary_device)
|
model.transformer.wpe.to(breakmodel.primary_device)
|
||||||
else:
|
elif(not hasattr(model.model, "decoder")):
|
||||||
model.model.embed_tokens.to(breakmodel.primary_device)
|
model.model.embed_tokens.to(breakmodel.primary_device)
|
||||||
model.model.layer_norm.to(breakmodel.primary_device)
|
model.model.layer_norm.to(breakmodel.primary_device)
|
||||||
model.lm_head.to(breakmodel.primary_device)
|
model.lm_head.to(breakmodel.primary_device)
|
||||||
model.model.embed_positions.to(breakmodel.primary_device)
|
model.model.embed_positions.to(breakmodel.primary_device)
|
||||||
|
else:
|
||||||
|
model.model.decoder.embed_tokens.to(breakmodel.primary_device)
|
||||||
|
if(model.model.decoder.project_in is not None):
|
||||||
|
model.model.decoder.project_in.to(breakmodel.primary_device)
|
||||||
|
if(model.model.decoder.project_out is not None):
|
||||||
|
model.model.decoder.project_out.to(breakmodel.primary_device)
|
||||||
|
model.model.decoder.embed_positions.to(breakmodel.primary_device)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
GPTNeoModel.forward = breakmodel.new_forward_neo
|
GPTNeoModel.forward = breakmodel.new_forward_neo
|
||||||
if("GPTJModel" in globals()):
|
if("GPTJModel" in globals()):
|
||||||
GPTJModel.forward = breakmodel.new_forward_neo # type: ignore
|
GPTJModel.forward = breakmodel.new_forward_neo # type: ignore
|
||||||
if("XGLMModel" in globals()):
|
if("XGLMModel" in globals()):
|
||||||
XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore
|
XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore
|
||||||
|
if("OPTDecoder" in globals()):
|
||||||
|
OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore
|
||||||
generator = model.generate
|
generator = model.generate
|
||||||
if(hasattr(model, "transformer")):
|
if(hasattr(model, "transformer")):
|
||||||
breakmodel.move_hidden_layers(model.transformer)
|
breakmodel.move_hidden_layers(model.transformer)
|
||||||
else:
|
elif(not hasattr(model.model, "decoder")):
|
||||||
breakmodel.move_hidden_layers(model.model, model.model.layers)
|
breakmodel.move_hidden_layers(model.model, model.model.layers)
|
||||||
|
else:
|
||||||
|
breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers)
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Allow the models to override some settings
|
# Allow the models to override some settings
|
||||||
@@ -515,13 +570,17 @@ def loadmodelsettings():
|
|||||||
js = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))
|
js = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
js = {}
|
js = {}
|
||||||
if vars.model_type == "xglm" or vars.model_type == "opt" or js.get("compat", "j") == "fairseq_lm":
|
if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
|
||||||
vars.newlinemode = "s" # Default to </s> newline mode if using XGLM
|
vars.newlinemode = "s" # Default to </s> newline mode if using XGLM
|
||||||
|
if vars.model_type == "opt":
|
||||||
|
vars.newlinemode = "ns" # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
|
||||||
vars.modelconfig = js
|
vars.modelconfig = js
|
||||||
if("badwordsids" in js):
|
if("badwordsids" in js):
|
||||||
vars.badwordsids = js["badwordsids"]
|
vars.badwordsids = js["badwordsids"]
|
||||||
if("nobreakmodel" in js):
|
if("nobreakmodel" in js):
|
||||||
vars.nobreakmodel = js["nobreakmodel"]
|
vars.nobreakmodel = js["nobreakmodel"]
|
||||||
|
if("sampler_order" in js):
|
||||||
|
vars.sampler_order = js["sampler_order"]
|
||||||
if("temp" in js):
|
if("temp" in js):
|
||||||
vars.temp = js["temp"]
|
vars.temp = js["temp"]
|
||||||
if("top_p" in js):
|
if("top_p" in js):
|
||||||
@@ -532,6 +591,8 @@ def loadmodelsettings():
|
|||||||
vars.tfs = js["tfs"]
|
vars.tfs = js["tfs"]
|
||||||
if("typical" in js):
|
if("typical" in js):
|
||||||
vars.typical = js["typical"]
|
vars.typical = js["typical"]
|
||||||
|
if("top_a" in js):
|
||||||
|
vars.top_a = js["top_a"]
|
||||||
if("rep_pen" in js):
|
if("rep_pen" in js):
|
||||||
vars.rep_pen = js["rep_pen"]
|
vars.rep_pen = js["rep_pen"]
|
||||||
if("rep_pen_slope" in js):
|
if("rep_pen_slope" in js):
|
||||||
@@ -563,11 +624,13 @@ def savesettings():
|
|||||||
js = {}
|
js = {}
|
||||||
js["apikey"] = vars.apikey
|
js["apikey"] = vars.apikey
|
||||||
js["andepth"] = vars.andepth
|
js["andepth"] = vars.andepth
|
||||||
|
js["sampler_order"] = vars.sampler_order
|
||||||
js["temp"] = vars.temp
|
js["temp"] = vars.temp
|
||||||
js["top_p"] = vars.top_p
|
js["top_p"] = vars.top_p
|
||||||
js["top_k"] = vars.top_k
|
js["top_k"] = vars.top_k
|
||||||
js["tfs"] = vars.tfs
|
js["tfs"] = vars.tfs
|
||||||
js["typical"] = vars.typical
|
js["typical"] = vars.typical
|
||||||
|
js["top_a"] = vars.top_a
|
||||||
js["rep_pen"] = vars.rep_pen
|
js["rep_pen"] = vars.rep_pen
|
||||||
js["rep_pen_slope"] = vars.rep_pen_slope
|
js["rep_pen_slope"] = vars.rep_pen_slope
|
||||||
js["rep_pen_range"] = vars.rep_pen_range
|
js["rep_pen_range"] = vars.rep_pen_range
|
||||||
@@ -615,88 +678,102 @@ def settingschanged():
|
|||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Read settings from client file JSON and send to vars
|
# Read settings from client file JSON and send to vars
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
|
|
||||||
def loadsettings():
|
def loadsettings():
|
||||||
|
if(path.exists("defaults/" + getmodelname().replace('/', '_') + ".settings")):
|
||||||
|
# Read file contents into JSON object
|
||||||
|
file = open("defaults/" + getmodelname().replace('/', '_') + ".settings", "r")
|
||||||
|
js = json.load(file)
|
||||||
|
|
||||||
|
processsettings(js)
|
||||||
|
file.close()
|
||||||
if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
|
if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
|
||||||
# Read file contents into JSON object
|
# Read file contents into JSON object
|
||||||
file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
|
file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r")
|
||||||
js = json.load(file)
|
js = json.load(file)
|
||||||
|
|
||||||
# Copy file contents to vars
|
processsettings(js)
|
||||||
if("apikey" in js):
|
|
||||||
vars.apikey = js["apikey"]
|
|
||||||
if("andepth" in js):
|
|
||||||
vars.andepth = js["andepth"]
|
|
||||||
if("temp" in js):
|
|
||||||
vars.temp = js["temp"]
|
|
||||||
if("top_p" in js):
|
|
||||||
vars.top_p = js["top_p"]
|
|
||||||
if("top_k" in js):
|
|
||||||
vars.top_k = js["top_k"]
|
|
||||||
if("tfs" in js):
|
|
||||||
vars.tfs = js["tfs"]
|
|
||||||
if("typical" in js):
|
|
||||||
vars.typical = js["typical"]
|
|
||||||
if("rep_pen" in js):
|
|
||||||
vars.rep_pen = js["rep_pen"]
|
|
||||||
if("rep_pen_slope" in js):
|
|
||||||
vars.rep_pen_slope = js["rep_pen_slope"]
|
|
||||||
if("rep_pen_range" in js):
|
|
||||||
vars.rep_pen_range = js["rep_pen_range"]
|
|
||||||
if("genamt" in js):
|
|
||||||
vars.genamt = js["genamt"]
|
|
||||||
if("max_length" in js):
|
|
||||||
vars.max_length = js["max_length"]
|
|
||||||
if("ikgen" in js):
|
|
||||||
vars.ikgen = js["ikgen"]
|
|
||||||
if("formatoptns" in js):
|
|
||||||
vars.formatoptns = js["formatoptns"]
|
|
||||||
if("numseqs" in js):
|
|
||||||
vars.numseqs = js["numseqs"]
|
|
||||||
if("widepth" in js):
|
|
||||||
vars.widepth = js["widepth"]
|
|
||||||
if("useprompt" in js):
|
|
||||||
vars.useprompt = js["useprompt"]
|
|
||||||
if("adventure" in js):
|
|
||||||
vars.adventure = js["adventure"]
|
|
||||||
if("chatmode" in js):
|
|
||||||
vars.chatmode = js["chatmode"]
|
|
||||||
if("chatname" in js):
|
|
||||||
vars.chatname = js["chatname"]
|
|
||||||
if("dynamicscan" in js):
|
|
||||||
vars.dynamicscan = js["dynamicscan"]
|
|
||||||
if("nopromptgen" in js):
|
|
||||||
vars.nopromptgen = js["nopromptgen"]
|
|
||||||
if("rngpersist" in js):
|
|
||||||
vars.rngpersist = js["rngpersist"]
|
|
||||||
if("nogenmod" in js):
|
|
||||||
vars.nogenmod = js["nogenmod"]
|
|
||||||
if("autosave" in js):
|
|
||||||
vars.autosave = js["autosave"]
|
|
||||||
if("newlinemode" in js):
|
|
||||||
vars.newlinemode = js["newlinemode"]
|
|
||||||
if("welcome" in js):
|
|
||||||
vars.welcome = js["welcome"]
|
|
||||||
|
|
||||||
if("antemplate" in js):
|
|
||||||
vars.setauthornotetemplate = js["antemplate"]
|
|
||||||
if(not vars.gamestarted):
|
|
||||||
vars.authornotetemplate = vars.setauthornotetemplate
|
|
||||||
|
|
||||||
if("userscripts" in js):
|
|
||||||
vars.userscripts = []
|
|
||||||
for userscript in js["userscripts"]:
|
|
||||||
if type(userscript) is not str:
|
|
||||||
continue
|
|
||||||
userscript = userscript.strip()
|
|
||||||
if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
|
|
||||||
vars.userscripts.append(userscript)
|
|
||||||
|
|
||||||
if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
|
|
||||||
vars.corescript = js["corescript"]
|
|
||||||
else:
|
|
||||||
vars.corescript = "default.lua"
|
|
||||||
|
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
|
def processsettings(js):
|
||||||
|
# Copy file contents to vars
|
||||||
|
if("apikey" in js):
|
||||||
|
vars.apikey = js["apikey"]
|
||||||
|
if("andepth" in js):
|
||||||
|
vars.andepth = js["andepth"]
|
||||||
|
if("sampler_order" in js):
|
||||||
|
vars.sampler_order = js["sampler_order"]
|
||||||
|
if("temp" in js):
|
||||||
|
vars.temp = js["temp"]
|
||||||
|
if("top_p" in js):
|
||||||
|
vars.top_p = js["top_p"]
|
||||||
|
if("top_k" in js):
|
||||||
|
vars.top_k = js["top_k"]
|
||||||
|
if("tfs" in js):
|
||||||
|
vars.tfs = js["tfs"]
|
||||||
|
if("typical" in js):
|
||||||
|
vars.typical = js["typical"]
|
||||||
|
if("top_a" in js):
|
||||||
|
vars.top_a = js["top_a"]
|
||||||
|
if("rep_pen" in js):
|
||||||
|
vars.rep_pen = js["rep_pen"]
|
||||||
|
if("rep_pen_slope" in js):
|
||||||
|
vars.rep_pen_slope = js["rep_pen_slope"]
|
||||||
|
if("rep_pen_range" in js):
|
||||||
|
vars.rep_pen_range = js["rep_pen_range"]
|
||||||
|
if("genamt" in js):
|
||||||
|
vars.genamt = js["genamt"]
|
||||||
|
if("max_length" in js):
|
||||||
|
vars.max_length = js["max_length"]
|
||||||
|
if("ikgen" in js):
|
||||||
|
vars.ikgen = js["ikgen"]
|
||||||
|
if("formatoptns" in js):
|
||||||
|
vars.formatoptns = js["formatoptns"]
|
||||||
|
if("numseqs" in js):
|
||||||
|
vars.numseqs = js["numseqs"]
|
||||||
|
if("widepth" in js):
|
||||||
|
vars.widepth = js["widepth"]
|
||||||
|
if("useprompt" in js):
|
||||||
|
vars.useprompt = js["useprompt"]
|
||||||
|
if("adventure" in js):
|
||||||
|
vars.adventure = js["adventure"]
|
||||||
|
if("chatmode" in js):
|
||||||
|
vars.chatmode = js["chatmode"]
|
||||||
|
if("chatname" in js):
|
||||||
|
vars.chatname = js["chatname"]
|
||||||
|
if("dynamicscan" in js):
|
||||||
|
vars.dynamicscan = js["dynamicscan"]
|
||||||
|
if("nopromptgen" in js):
|
||||||
|
vars.nopromptgen = js["nopromptgen"]
|
||||||
|
if("rngpersist" in js):
|
||||||
|
vars.rngpersist = js["rngpersist"]
|
||||||
|
if("nogenmod" in js):
|
||||||
|
vars.nogenmod = js["nogenmod"]
|
||||||
|
if("autosave" in js):
|
||||||
|
vars.autosave = js["autosave"]
|
||||||
|
if("newlinemode" in js):
|
||||||
|
vars.newlinemode = js["newlinemode"]
|
||||||
|
if("welcome" in js):
|
||||||
|
vars.welcome = js["welcome"]
|
||||||
|
|
||||||
|
if("antemplate" in js):
|
||||||
|
vars.setauthornotetemplate = js["antemplate"]
|
||||||
|
if(not vars.gamestarted):
|
||||||
|
vars.authornotetemplate = vars.setauthornotetemplate
|
||||||
|
|
||||||
|
if("userscripts" in js):
|
||||||
|
vars.userscripts = []
|
||||||
|
for userscript in js["userscripts"]:
|
||||||
|
if type(userscript) is not str:
|
||||||
|
continue
|
||||||
|
userscript = userscript.strip()
|
||||||
|
if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)):
|
||||||
|
vars.userscripts.append(userscript)
|
||||||
|
|
||||||
|
if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))):
|
||||||
|
vars.corescript = js["corescript"]
|
||||||
|
else:
|
||||||
|
vars.corescript = "default.lua"
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Load a soft prompt from a file
|
# Load a soft prompt from a file
|
||||||
@@ -760,7 +837,7 @@ def spRequest(filename):
|
|||||||
tensor = tensor.reshape(
|
tensor = tensor.reshape(
|
||||||
tpu_mtj_backend.params["cores_per_replica"],
|
tpu_mtj_backend.params["cores_per_replica"],
|
||||||
-1,
|
-1,
|
||||||
tpu_mtj_backend.params["d_model"],
|
tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
|
||||||
)
|
)
|
||||||
vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
|
vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor))
|
||||||
else:
|
else:
|
||||||
@@ -782,6 +859,7 @@ parser.add_argument("--ngrok", action='store_true', help="Optimizes KoboldAI for
|
|||||||
parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel")
|
parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel")
|
||||||
parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service")
|
parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service")
|
||||||
parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
|
parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
|
||||||
|
parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
|
||||||
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
|
||||||
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
|
||||||
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
|
parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
|
||||||
@@ -841,6 +919,8 @@ if args.cpu:
|
|||||||
vars.smandelete = vars.host == args.override_delete
|
vars.smandelete = vars.host == args.override_delete
|
||||||
vars.smanrename = vars.host == args.override_rename
|
vars.smanrename = vars.host == args.override_rename
|
||||||
|
|
||||||
|
vars.aria2_port = args.aria2_port or 6799
|
||||||
|
|
||||||
# Select a model to run
|
# Select a model to run
|
||||||
if args.model:
|
if args.model:
|
||||||
print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model)
|
print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model)
|
||||||
@@ -894,12 +974,15 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe
|
|||||||
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
||||||
vars.model_type = "gpt_neo"
|
vars.model_type = "gpt_neo"
|
||||||
|
|
||||||
|
if(vars.model_type == "opt"):
|
||||||
|
vars.badwordsids = vars.badwordsids_opt
|
||||||
|
|
||||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||||
loadmodelsettings()
|
loadmodelsettings()
|
||||||
loadsettings()
|
loadsettings()
|
||||||
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
|
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
|
||||||
vars.hascuda = torch.cuda.is_available()
|
vars.hascuda = torch.cuda.is_available()
|
||||||
vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm") and not vars.nobreakmodel
|
vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel
|
||||||
if(args.breakmodel is not None and args.breakmodel):
|
if(args.breakmodel is not None and args.breakmodel):
|
||||||
print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
|
print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr)
|
||||||
if(args.breakmodel_layers is not None):
|
if(args.breakmodel_layers is not None):
|
||||||
@@ -1111,17 +1194,36 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
globals()[m] = getattr(__import__("transformers"), m)
|
globals()[m] = getattr(__import__("transformers"), m)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
from transformers.models.opt.modeling_opt import OPTDecoder
|
||||||
|
except:
|
||||||
|
pass
|
||||||
import transformers.generation_utils
|
import transformers.generation_utils
|
||||||
from transformers import __version__ as transformers_version
|
from transformers import __version__ as transformers_version
|
||||||
|
|
||||||
from transformers import PreTrainedModel
|
from transformers import PreTrainedModel
|
||||||
|
from transformers import modeling_utils
|
||||||
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
|
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
|
||||||
@classmethod
|
@classmethod
|
||||||
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
||||||
|
vars.fp32_model = False
|
||||||
|
utils.num_shards = None
|
||||||
|
utils.current_shard = 0
|
||||||
|
utils.from_pretrained_model_name = pretrained_model_name_or_path
|
||||||
|
utils.from_pretrained_index_filename = None
|
||||||
|
utils.from_pretrained_kwargs = kwargs
|
||||||
|
utils.bar = None
|
||||||
if not args.no_aria2:
|
if not args.no_aria2:
|
||||||
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
||||||
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
||||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||||
|
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
|
||||||
|
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
|
||||||
|
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
|
||||||
|
utils.num_shards = utils.get_num_shards(index_filename)
|
||||||
|
utils.from_pretrained_index_filename = index_filename
|
||||||
|
return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
|
||||||
|
modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
|
||||||
|
|
||||||
# Lazy loader
|
# Lazy loader
|
||||||
import torch_lazy_loader
|
import torch_lazy_loader
|
||||||
@@ -1139,6 +1241,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
|
ram_blocks = gpu_blocks = cumulative_gpu_blocks = None
|
||||||
|
|
||||||
def lazy_load_callback(model_dict, f, **_):
|
def lazy_load_callback(model_dict, f, **_):
|
||||||
|
if lazy_load_callback.nested:
|
||||||
|
return
|
||||||
|
lazy_load_callback.nested = True
|
||||||
|
|
||||||
device_map = {}
|
device_map = {}
|
||||||
|
|
||||||
for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
|
for _key, spec in lazy_load_spec.get("layer_weights", {}).items():
|
||||||
@@ -1153,12 +1259,22 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
|
if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map:
|
||||||
device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
|
device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu"
|
||||||
|
|
||||||
|
if utils.num_shards is None or utils.current_shard == 0:
|
||||||
|
if utils.num_shards is not None:
|
||||||
|
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
|
||||||
|
else:
|
||||||
|
num_tensors = len(device_map)
|
||||||
|
print(flush=True)
|
||||||
|
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
|
||||||
|
|
||||||
with zipfile.ZipFile(f, "r") as z:
|
with zipfile.ZipFile(f, "r") as z:
|
||||||
try:
|
try:
|
||||||
last_storage_key = None
|
last_storage_key = None
|
||||||
f = None
|
f = None
|
||||||
current_offset = 0
|
current_offset = 0
|
||||||
for key in tqdm(sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
|
if utils.num_shards is not None:
|
||||||
|
utils.current_shard += 1
|
||||||
|
for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
|
||||||
storage_key = model_dict[key].key
|
storage_key = model_dict[key].key
|
||||||
if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
|
if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset:
|
||||||
last_storage_key = storage_key
|
last_storage_key = storage_key
|
||||||
@@ -1175,6 +1291,8 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
||||||
#print(f"Transferring <{key}> to {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
|
#print(f"Transferring <{key}> to {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True)
|
||||||
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
model_dict[key] = model_dict[key].materialize(f, map_location="cpu")
|
||||||
|
if model_dict[key].dtype is torch.float32:
|
||||||
|
vars.fp32_model = True
|
||||||
if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
|
if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32:
|
||||||
model_dict[key] = model_dict[key].to(torch.float16)
|
model_dict[key] = model_dict[key].to(torch.float16)
|
||||||
if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
|
if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16:
|
||||||
@@ -1182,10 +1300,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
model_dict[key] = model_dict[key].to(device)
|
model_dict[key] = model_dict[key].to(device)
|
||||||
#print("OK", flush=True)
|
#print("OK", flush=True)
|
||||||
current_offset += nbytes
|
current_offset += nbytes
|
||||||
|
utils.bar.update(1)
|
||||||
finally:
|
finally:
|
||||||
|
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
|
||||||
|
utils.bar.close()
|
||||||
|
utils.bar = None
|
||||||
|
lazy_load_callback.nested = False
|
||||||
if isinstance(f, zipfile.ZipExtFile):
|
if isinstance(f, zipfile.ZipExtFile):
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
lazy_load_callback.nested = False
|
||||||
return lazy_load_callback
|
return lazy_load_callback
|
||||||
|
|
||||||
lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
|
lazy_load_config_path = os.path.join("maps", vars.model_type + ".json")
|
||||||
@@ -1231,8 +1355,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
input_ids.clamp_(max=self.config.vocab_size-1)
|
input_ids.clamp_(max=self.config.vocab_size-1)
|
||||||
if(hasattr(self, "transformer")):
|
if(hasattr(self, "transformer")):
|
||||||
inputs_embeds = self.transformer.wte(input_ids)
|
inputs_embeds = self.transformer.wte(input_ids)
|
||||||
else:
|
elif(not hasattr(self.model, "decoder")):
|
||||||
inputs_embeds = self.model.embed_tokens(input_ids)
|
inputs_embeds = self.model.embed_tokens(input_ids)
|
||||||
|
else:
|
||||||
|
inputs_embeds = self.model.decoder.embed_tokens(input_ids)
|
||||||
if(vars.sp is not None):
|
if(vars.sp is not None):
|
||||||
vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
|
vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
|
||||||
inputs_embeds = torch.where(
|
inputs_embeds = torch.where(
|
||||||
@@ -1240,23 +1366,42 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
vars.sp[shifted_input_ids.clamp(min=0)],
|
vars.sp[shifted_input_ids.clamp(min=0)],
|
||||||
inputs_embeds,
|
inputs_embeds,
|
||||||
)
|
)
|
||||||
if(not hasattr(self, "transformer")):
|
if(hasattr(self, "model") and hasattr(self.model, "embed_scale")):
|
||||||
inputs_embeds *= self.model.embed_scale
|
inputs_embeds *= self.model.embed_scale
|
||||||
kwargs['inputs_embeds'] = inputs_embeds
|
kwargs['inputs_embeds'] = inputs_embeds
|
||||||
return old_forward(self, *args, **kwargs)
|
return old_forward(self, *args, **kwargs)
|
||||||
cls.forward = new_causallm_forward
|
cls.forward = new_causallm_forward
|
||||||
for cls in (GPT2LMHeadModel, GPTNeoForCausalLM):
|
for cls in (GPT2LMHeadModel, GPTNeoForCausalLM):
|
||||||
patch_causallm(cls)
|
patch_causallm(cls)
|
||||||
for c in ("GPTJForCausalLM", "XGLMForCausalLM"):
|
for c in ("GPTJForCausalLM", "XGLMForCausalLM", "OPTForCausalLM"):
|
||||||
try:
|
try:
|
||||||
patch_causallm(getattr(__import__("transformers"), c))
|
patch_causallm(getattr(__import__("transformers"), c))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Fix a bug in OPTForCausalLM where self.lm_head is the wrong size
|
||||||
|
if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")):
|
||||||
|
try:
|
||||||
|
from transformers import OPTForCausalLM, OPTModel
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# This is the same as the original __init__ but with
|
||||||
|
# config.hidden_size
|
||||||
|
# replaced with
|
||||||
|
# config.word_embed_proj_dim
|
||||||
|
def new_init(self, config):
|
||||||
|
super(OPTForCausalLM, self).__init__(config)
|
||||||
|
self.model = OPTModel(config)
|
||||||
|
self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
|
||||||
|
self.post_init()
|
||||||
|
OPTForCausalLM.__init__ = new_init
|
||||||
|
|
||||||
|
|
||||||
# Patch transformers to use our custom logit warpers
|
# Patch transformers to use our custom logit warpers
|
||||||
from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
|
from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
|
||||||
from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper
|
from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper, TopALogitsWarper
|
||||||
|
|
||||||
def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
|
def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
|
||||||
old_call = cls.__call__
|
old_call = cls.__call__
|
||||||
@@ -1276,6 +1421,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
cls.__call__ = new_call
|
cls.__call__ = new_call
|
||||||
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
|
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
|
||||||
dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
|
dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
|
||||||
|
dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
|
||||||
dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
|
dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
|
||||||
dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
|
dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
|
||||||
dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
|
dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
|
||||||
@@ -1319,21 +1465,30 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor
|
new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor
|
||||||
transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor
|
transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor
|
||||||
|
|
||||||
|
class KoboldLogitsWarperList(LogitsProcessorList):
|
||||||
|
def __init__(self, beams: int = 1, **kwargs):
|
||||||
|
self.__warper_list: List[LogitsWarper] = []
|
||||||
|
self.__warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
|
||||||
|
self.__warper_list.append(TopALogitsWarper(top_a=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
||||||
|
self.__warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
||||||
|
self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
||||||
|
self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
||||||
|
self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5))
|
||||||
|
|
||||||
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs):
|
||||||
|
for k in vars.sampler_order:
|
||||||
|
scores = self.__warper_list[k](input_ids, scores, *args, **kwargs)
|
||||||
|
return scores
|
||||||
|
|
||||||
def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
|
def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
|
||||||
warper_list = LogitsProcessorList()
|
return KoboldLogitsWarperList(beams=beams)
|
||||||
warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
|
|
||||||
warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
|
||||||
warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
|
||||||
warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
|
|
||||||
warper_list.append(TemperatureLogitsWarper(temperature=0.5))
|
|
||||||
return warper_list
|
|
||||||
|
|
||||||
def new_sample(self, *args, **kwargs):
|
def new_sample(self, *args, **kwargs):
|
||||||
assert kwargs.pop("logits_warper", None) is not None
|
assert kwargs.pop("logits_warper", None) is not None
|
||||||
kwargs["logits_warper"] = new_get_logits_warper(
|
kwargs["logits_warper"] = new_get_logits_warper(
|
||||||
beams=1,
|
beams=1,
|
||||||
)
|
)
|
||||||
if(vars.newlinemode == "s"):
|
if(vars.newlinemode == "s") or (vars.newlinemode == "ns"):
|
||||||
kwargs["eos_token_id"] = -1
|
kwargs["eos_token_id"] = -1
|
||||||
kwargs.setdefault("pad_token_id", 2)
|
kwargs.setdefault("pad_token_id", 2)
|
||||||
return new_sample.old_sample(self, *args, **kwargs)
|
return new_sample.old_sample(self, *args, **kwargs)
|
||||||
@@ -1408,12 +1563,18 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
|
|
||||||
def get_hidden_size_from_model(model):
|
def get_hidden_size_from_model(model):
|
||||||
try:
|
try:
|
||||||
return int(model.transformer.hidden_size)
|
return int(model.model.decoder.project_in.in_features)
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
return int(model.transformer.embed_dim)
|
return int(model.model.decoder.embed_tokens.out_features)
|
||||||
except:
|
except:
|
||||||
return int(model.lm_head.in_features)
|
try:
|
||||||
|
return int(model.transformer.hidden_size)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
return int(model.transformer.embed_dim)
|
||||||
|
except:
|
||||||
|
return int(model.lm_head.in_features)
|
||||||
|
|
||||||
def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
|
def maybe_low_cpu_mem_usage() -> Dict[str, Any]:
|
||||||
if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
|
if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")):
|
||||||
@@ -1468,12 +1629,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
import shutil
|
import shutil
|
||||||
shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
|
shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_')))
|
||||||
print("\n", flush=True)
|
print("\n", flush=True)
|
||||||
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer) if vars.lazy_load else None, dematerialized_modules=True):
|
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
|
||||||
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
||||||
lowmem = {}
|
lowmem = {}
|
||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
@@ -1486,6 +1651,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
@@ -1496,8 +1665,25 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
|
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
|
||||||
else:
|
else:
|
||||||
|
old_rebuild_tensor = torch._utils._rebuild_tensor
|
||||||
|
def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride):
|
||||||
|
if(not isinstance(storage, torch_lazy_loader.LazyTensor)):
|
||||||
|
dtype = storage.dtype
|
||||||
|
else:
|
||||||
|
dtype = storage.storage_type.dtype
|
||||||
|
if(not isinstance(dtype, torch.dtype)):
|
||||||
|
dtype = storage.storage_type(0).dtype
|
||||||
|
if(dtype is torch.float32 and len(shape) >= 2):
|
||||||
|
vars.fp32_model = True
|
||||||
|
return old_rebuild_tensor(storage, storage_offset, shape, stride)
|
||||||
|
torch._utils._rebuild_tensor = new_rebuild_tensor
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
@@ -1508,11 +1694,32 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
|
model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||||
|
|
||||||
|
torch._utils._rebuild_tensor = old_rebuild_tensor
|
||||||
|
|
||||||
if not args.colab or args.savemodel:
|
if not args.colab or args.savemodel:
|
||||||
import shutil
|
import shutil
|
||||||
model = model.half()
|
|
||||||
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
|
|
||||||
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
|
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
|
||||||
|
if(vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16
|
||||||
|
model = model.half()
|
||||||
|
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
|
||||||
|
else: # For fp16 models, we can just copy the model files directly
|
||||||
|
import transformers.configuration_utils
|
||||||
|
import transformers.modeling_utils
|
||||||
|
import transformers.file_utils
|
||||||
|
# Save the config.json
|
||||||
|
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME))
|
||||||
|
if(utils.num_shards is None):
|
||||||
|
# Save the pytorch_model.bin of an unsharded model
|
||||||
|
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME))
|
||||||
|
else:
|
||||||
|
with open(utils.from_pretrained_index_filename) as f:
|
||||||
|
map_data = json.load(f)
|
||||||
|
filenames = set(map_data["weight_map"].values())
|
||||||
|
# Save the pytorch_model.bin.index.json of a sharded model
|
||||||
|
shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME))
|
||||||
|
# Then save the pytorch_model-#####-of-#####.bin files
|
||||||
|
for filename in filenames:
|
||||||
|
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
|
||||||
shutil.rmtree("cache/")
|
shutil.rmtree("cache/")
|
||||||
|
|
||||||
if(vars.hascuda):
|
if(vars.hascuda):
|
||||||
@@ -1548,13 +1755,28 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
|||||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||||
else:
|
else:
|
||||||
from transformers import PreTrainedModel
|
from transformers import PreTrainedModel
|
||||||
|
from transformers import modeling_utils
|
||||||
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
|
old_from_pretrained = PreTrainedModel.from_pretrained.__func__
|
||||||
@classmethod
|
@classmethod
|
||||||
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
||||||
|
vars.fp32_model = False
|
||||||
|
utils.num_shards = None
|
||||||
|
utils.current_shard = 0
|
||||||
|
utils.from_pretrained_model_name = pretrained_model_name_or_path
|
||||||
|
utils.from_pretrained_index_filename = None
|
||||||
|
utils.from_pretrained_kwargs = kwargs
|
||||||
|
utils.bar = None
|
||||||
if not args.no_aria2:
|
if not args.no_aria2:
|
||||||
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
|
||||||
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
|
||||||
PreTrainedModel.from_pretrained = new_from_pretrained
|
PreTrainedModel.from_pretrained = new_from_pretrained
|
||||||
|
if(hasattr(modeling_utils, "get_checkpoint_shard_files")):
|
||||||
|
old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files
|
||||||
|
def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs):
|
||||||
|
utils.num_shards = utils.get_num_shards(index_filename)
|
||||||
|
utils.from_pretrained_index_filename = index_filename
|
||||||
|
return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs)
|
||||||
|
modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files
|
||||||
|
|
||||||
def tpumtjgetsofttokens():
|
def tpumtjgetsofttokens():
|
||||||
soft_tokens = None
|
soft_tokens = None
|
||||||
@@ -1562,14 +1784,14 @@ else:
|
|||||||
global np
|
global np
|
||||||
if 'np' not in globals():
|
if 'np' not in globals():
|
||||||
import numpy as np
|
import numpy as np
|
||||||
tensor = np.zeros((1, tpu_mtj_backend.params["d_model"]), dtype=np.float32)
|
tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32)
|
||||||
rows = tensor.shape[0]
|
rows = tensor.shape[0]
|
||||||
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
|
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
|
||||||
tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
|
tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
|
||||||
tensor = tensor.reshape(
|
tensor = tensor.reshape(
|
||||||
tpu_mtj_backend.params["cores_per_replica"],
|
tpu_mtj_backend.params["cores_per_replica"],
|
||||||
-1,
|
-1,
|
||||||
tpu_mtj_backend.params["d_model"],
|
tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]),
|
||||||
)
|
)
|
||||||
vars.sp = tpu_mtj_backend.shard_xmap(tensor)
|
vars.sp = tpu_mtj_backend.shard_xmap(tensor)
|
||||||
soft_tokens = np.arange(
|
soft_tokens = np.arange(
|
||||||
@@ -1631,11 +1853,13 @@ else:
|
|||||||
|
|
||||||
def tpumtjgenerate_settings_callback() -> dict:
|
def tpumtjgenerate_settings_callback() -> dict:
|
||||||
return {
|
return {
|
||||||
|
"sampler_order": vars.sampler_order,
|
||||||
"top_p": float(vars.top_p),
|
"top_p": float(vars.top_p),
|
||||||
"temp": float(vars.temp),
|
"temp": float(vars.temp),
|
||||||
"top_k": int(vars.top_k),
|
"top_k": int(vars.top_k),
|
||||||
"tfs": float(vars.tfs),
|
"tfs": float(vars.tfs),
|
||||||
"typical": float(vars.typical),
|
"typical": float(vars.typical),
|
||||||
|
"top_a": float(vars.top_a),
|
||||||
"repetition_penalty": float(vars.rep_pen),
|
"repetition_penalty": float(vars.rep_pen),
|
||||||
"rpslope": float(vars.rep_pen_slope),
|
"rpslope": float(vars.rep_pen_slope),
|
||||||
"rprange": int(vars.rep_pen_range),
|
"rprange": int(vars.rep_pen_range),
|
||||||
@@ -1658,7 +1882,7 @@ else:
|
|||||||
if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
|
if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
|
||||||
raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
|
raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
|
||||||
import tpu_mtj_backend
|
import tpu_mtj_backend
|
||||||
if(vars.model == "TPUMeshTransformerGPTNeoX"):
|
if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"):
|
||||||
tpu_mtj_backend.pad_token_id = 1
|
tpu_mtj_backend.pad_token_id = 1
|
||||||
tpu_mtj_backend.vars = vars
|
tpu_mtj_backend.vars = vars
|
||||||
tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
|
tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
|
||||||
@@ -1670,7 +1894,7 @@ else:
|
|||||||
loadmodelsettings()
|
loadmodelsettings()
|
||||||
loadsettings()
|
loadsettings()
|
||||||
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
|
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
|
||||||
vars.modeldim = int(tpu_mtj_backend.params["d_model"])
|
vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]))
|
||||||
tokenizer = tpu_mtj_backend.tokenizer
|
tokenizer = tpu_mtj_backend.tokenizer
|
||||||
else:
|
else:
|
||||||
loadsettings()
|
loadsettings()
|
||||||
@@ -1998,6 +2222,7 @@ def lua_has_setting(setting):
|
|||||||
"settopk",
|
"settopk",
|
||||||
"settfs",
|
"settfs",
|
||||||
"settypical",
|
"settypical",
|
||||||
|
"settopa",
|
||||||
"setreppen",
|
"setreppen",
|
||||||
"setreppenslope",
|
"setreppenslope",
|
||||||
"setreppenrange",
|
"setreppenrange",
|
||||||
@@ -2017,6 +2242,7 @@ def lua_has_setting(setting):
|
|||||||
"top_k",
|
"top_k",
|
||||||
"tfs",
|
"tfs",
|
||||||
"typical",
|
"typical",
|
||||||
|
"topa",
|
||||||
"reppen",
|
"reppen",
|
||||||
"reppenslope",
|
"reppenslope",
|
||||||
"reppenrange",
|
"reppenrange",
|
||||||
@@ -2051,6 +2277,7 @@ def lua_get_setting(setting):
|
|||||||
if(setting in ("settopk", "topk", "top_k")): return vars.top_k
|
if(setting in ("settopk", "topk", "top_k")): return vars.top_k
|
||||||
if(setting in ("settfs", "tfs")): return vars.tfs
|
if(setting in ("settfs", "tfs")): return vars.tfs
|
||||||
if(setting in ("settypical", "typical")): return vars.typical
|
if(setting in ("settypical", "typical")): return vars.typical
|
||||||
|
if(setting in ("settopa", "topa")): return vars.top_a
|
||||||
if(setting in ("setreppen", "reppen")): return vars.rep_pen
|
if(setting in ("setreppen", "reppen")): return vars.rep_pen
|
||||||
if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
|
if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
|
||||||
if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
|
if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
|
||||||
@@ -2086,6 +2313,7 @@ def lua_set_setting(setting, v):
|
|||||||
if(setting in ("settopk", "topk")): vars.top_k = v
|
if(setting in ("settopk", "topk")): vars.top_k = v
|
||||||
if(setting in ("settfs", "tfs")): vars.tfs = v
|
if(setting in ("settfs", "tfs")): vars.tfs = v
|
||||||
if(setting in ("settypical", "typical")): vars.typical = v
|
if(setting in ("settypical", "typical")): vars.typical = v
|
||||||
|
if(setting in ("settopa", "topa")): vars.top_a = v
|
||||||
if(setting in ("setreppen", "reppen")): vars.rep_pen = v
|
if(setting in ("setreppen", "reppen")): vars.rep_pen = v
|
||||||
if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
|
if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
|
||||||
if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
|
if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
|
||||||
@@ -2510,6 +2738,11 @@ def get_message(msg):
|
|||||||
emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
|
emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
|
||||||
settingschanged()
|
settingschanged()
|
||||||
refresh_settings()
|
refresh_settings()
|
||||||
|
elif(msg['cmd'] == 'settopa'):
|
||||||
|
vars.top_a = float(msg['data'])
|
||||||
|
emit('from_server', {'cmd': 'setlabeltopa', 'data': msg['data']}, broadcast=True)
|
||||||
|
settingschanged()
|
||||||
|
refresh_settings()
|
||||||
elif(msg['cmd'] == 'setreppen'):
|
elif(msg['cmd'] == 'setreppen'):
|
||||||
vars.rep_pen = float(msg['data'])
|
vars.rep_pen = float(msg['data'])
|
||||||
emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
|
emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
|
||||||
@@ -2663,6 +2896,8 @@ def get_message(msg):
|
|||||||
elif(msg['cmd'] == 'uslistrequest'):
|
elif(msg['cmd'] == 'uslistrequest'):
|
||||||
unloaded, loaded = getuslist()
|
unloaded, loaded = getuslist()
|
||||||
emit('from_server', {'cmd': 'buildus', 'data': {"unloaded": unloaded, "loaded": loaded}})
|
emit('from_server', {'cmd': 'buildus', 'data': {"unloaded": unloaded, "loaded": loaded}})
|
||||||
|
elif(msg['cmd'] == 'samplerlistrequest'):
|
||||||
|
emit('from_server', {'cmd': 'buildsamplers', 'data': vars.sampler_order})
|
||||||
elif(msg['cmd'] == 'usloaded'):
|
elif(msg['cmd'] == 'usloaded'):
|
||||||
vars.userscripts = []
|
vars.userscripts = []
|
||||||
for userscript in msg['data']:
|
for userscript in msg['data']:
|
||||||
@@ -2676,6 +2911,16 @@ def get_message(msg):
|
|||||||
load_lua_scripts()
|
load_lua_scripts()
|
||||||
unloaded, loaded = getuslist()
|
unloaded, loaded = getuslist()
|
||||||
sendUSStatItems()
|
sendUSStatItems()
|
||||||
|
elif(msg['cmd'] == 'samplers'):
|
||||||
|
sampler_order = msg["data"]
|
||||||
|
if(not isinstance(sampler_order, list)):
|
||||||
|
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
|
||||||
|
if(len(sampler_order) != len(vars.sampler_order)):
|
||||||
|
raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}")
|
||||||
|
if(not all(isinstance(e, int) for e in sampler_order)):
|
||||||
|
raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element")
|
||||||
|
vars.sampler_order = sampler_order
|
||||||
|
settingschanged()
|
||||||
elif(msg['cmd'] == 'loadselect'):
|
elif(msg['cmd'] == 'loadselect'):
|
||||||
vars.loadselect = msg["data"]
|
vars.loadselect = msg["data"]
|
||||||
elif(msg['cmd'] == 'spselect'):
|
elif(msg['cmd'] == 'spselect'):
|
||||||
@@ -3104,24 +3349,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||||
|
|
||||||
|
lnheader = len(tokenizer._koboldai_header)
|
||||||
|
|
||||||
# Calculate token budget
|
# Calculate token budget
|
||||||
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
||||||
lnprompt = len(prompttkns)
|
lnprompt = len(prompttkns)
|
||||||
|
|
||||||
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
||||||
lnmem = len(memtokens)
|
lnmem = len(memtokens)
|
||||||
if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
||||||
lnwi = len(witokens)
|
lnwi = len(witokens)
|
||||||
if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
||||||
lnanote = len(anotetkns)
|
lnanote = len(anotetkns)
|
||||||
if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(vars.useprompt):
|
if(vars.useprompt):
|
||||||
@@ -3132,14 +3379,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
||||||
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
||||||
|
|
||||||
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
||||||
|
|
||||||
assert budget >= 0
|
assert budget >= 0
|
||||||
|
|
||||||
if(actionlen == 0):
|
if(actionlen == 0):
|
||||||
# First/Prompt action
|
# First/Prompt action
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
ln = len(tokens) + lnsp
|
ln = len(tokens) + lnsp
|
||||||
return tokens, ln+1, ln+vars.genamt
|
return tokens, ln+1, ln+vars.genamt
|
||||||
@@ -3187,12 +3434,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
|||||||
# Did we get to add the A.N.? If not, do it here
|
# Did we get to add the A.N.? If not, do it here
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
if((not anoteadded) or forceanote):
|
if((not anoteadded) or forceanote):
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
# Prepend Memory, WI, and Prompt before action tokens
|
# Prepend Memory, WI, and Prompt before action tokens
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
|
|
||||||
# Send completed bundle to generator
|
# Send completed bundle to generator
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
@@ -3570,6 +3817,7 @@ def sendtocolab(txt, min, max):
|
|||||||
'top_k': vars.top_k,
|
'top_k': vars.top_k,
|
||||||
'tfs': vars.tfs,
|
'tfs': vars.tfs,
|
||||||
'typical': vars.typical,
|
'typical': vars.typical,
|
||||||
|
'topa': vars.top_a,
|
||||||
'numseqs': vars.numseqs,
|
'numseqs': vars.numseqs,
|
||||||
'retfultxt': False
|
'retfultxt': False
|
||||||
}
|
}
|
||||||
@@ -3707,12 +3955,14 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
|
|||||||
top_k=vars.top_k,
|
top_k=vars.top_k,
|
||||||
tfs=vars.tfs,
|
tfs=vars.tfs,
|
||||||
typical=vars.typical,
|
typical=vars.typical,
|
||||||
|
top_a=vars.top_a,
|
||||||
numseqs=vars.numseqs,
|
numseqs=vars.numseqs,
|
||||||
repetition_penalty=vars.rep_pen,
|
repetition_penalty=vars.rep_pen,
|
||||||
rpslope=vars.rep_pen_slope,
|
rpslope=vars.rep_pen_slope,
|
||||||
rprange=vars.rep_pen_range,
|
rprange=vars.rep_pen_range,
|
||||||
soft_embeddings=vars.sp,
|
soft_embeddings=vars.sp,
|
||||||
soft_tokens=soft_tokens,
|
soft_tokens=soft_tokens,
|
||||||
|
sampler_order=vars.sampler_order,
|
||||||
)
|
)
|
||||||
past = genout
|
past = genout
|
||||||
for i in range(vars.numseqs):
|
for i in range(vars.numseqs):
|
||||||
@@ -3893,6 +4143,7 @@ def refresh_settings():
|
|||||||
emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
|
emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
|
||||||
emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
|
emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
|
||||||
emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
|
emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
|
||||||
|
emit('from_server', {'cmd': 'updatetopa', 'data': vars.top_a}, broadcast=True)
|
||||||
emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
|
emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
|
||||||
emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
|
emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
|
||||||
emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)
|
emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)
|
||||||
@@ -4469,6 +4720,7 @@ def oairequest(txt, min, max):
|
|||||||
'prompt': txt,
|
'prompt': txt,
|
||||||
'max_tokens': vars.genamt,
|
'max_tokens': vars.genamt,
|
||||||
'temperature': vars.temp,
|
'temperature': vars.temp,
|
||||||
|
'top_a': vars.top_a,
|
||||||
'top_p': vars.top_p,
|
'top_p': vars.top_p,
|
||||||
'top_k': vars.top_k,
|
'top_k': vars.top_k,
|
||||||
'tfs': vars.tfs,
|
'tfs': vars.tfs,
|
||||||
|
182
breakmodel.py
182
breakmodel.py
@@ -633,11 +633,11 @@ def new_forward_xglm(
|
|||||||
layer_outputs = decoder_layer(
|
layer_outputs = decoder_layer(
|
||||||
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
||||||
attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
|
attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
|
||||||
encoder_hidden_states=encoder_hidden_states.to(device) if encoder_hidden_states is not None else None,
|
encoder_hidden_states=encoder_hidden_states.to(device) if breakmodel and encoder_hidden_states is not None else encoder_hidden_states,
|
||||||
encoder_attention_mask=encoder_attention_mask.to(device) if encoder_attention_mask is not None else None,
|
encoder_attention_mask=encoder_attention_mask.to(device) if breakmodel and encoder_attention_mask is not None else encoder_attention_mask,
|
||||||
layer_head_mask=((head_mask[idx].to(device) if head_mask[idx] is not None else None) if head_mask is not None else None),
|
layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
|
||||||
cross_attn_layer_head_mask=(
|
cross_attn_layer_head_mask=(
|
||||||
(cross_attn_head_mask[idx].to(device) if cross_attn_head_mask[idx] is not None else None) if cross_attn_head_mask is not None else None
|
(cross_attn_head_mask[idx].to(device) if breakmodel and cross_attn_head_mask[idx] is not None else cross_attn_head_mask[idx]) if cross_attn_head_mask is not None else None
|
||||||
),
|
),
|
||||||
past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
|
past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
|
||||||
output_attentions=output_attentions,
|
output_attentions=output_attentions,
|
||||||
@@ -686,3 +686,177 @@ def new_forward_xglm(
|
|||||||
attentions=all_self_attns,
|
attentions=all_self_attns,
|
||||||
cross_attentions=all_cross_attentions,
|
cross_attentions=all_cross_attentions,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def new_forward_opt(
|
||||||
|
self,
|
||||||
|
input_ids=None,
|
||||||
|
attention_mask=None,
|
||||||
|
head_mask=None,
|
||||||
|
past_key_values=None,
|
||||||
|
inputs_embeds=None,
|
||||||
|
use_cache=None,
|
||||||
|
output_attentions=None,
|
||||||
|
output_hidden_states=None,
|
||||||
|
return_dict=None,
|
||||||
|
):
|
||||||
|
assert len(gpu_blocks) <= torch.cuda.device_count()
|
||||||
|
assert sum(gpu_blocks) <= len(self.layers)
|
||||||
|
ram_blocks = len(self.layers) - sum(gpu_blocks)
|
||||||
|
cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks))
|
||||||
|
|
||||||
|
|
||||||
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
|
output_hidden_states = (
|
||||||
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||||
|
)
|
||||||
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
|
|
||||||
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
|
# retrieve input_ids and inputs_embeds
|
||||||
|
if input_ids is not None and inputs_embeds is not None:
|
||||||
|
raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
|
||||||
|
elif input_ids is not None:
|
||||||
|
input_shape = input_ids.size()
|
||||||
|
input_ids = input_ids.view(-1, input_shape[-1])
|
||||||
|
elif inputs_embeds is not None:
|
||||||
|
input_shape = inputs_embeds.size()[:-1]
|
||||||
|
else:
|
||||||
|
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
|
||||||
|
|
||||||
|
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
||||||
|
|
||||||
|
if inputs_embeds is None:
|
||||||
|
if breakmodel:
|
||||||
|
input_ids = input_ids.to(primary_device)
|
||||||
|
inputs_embeds = self.embed_tokens(input_ids)
|
||||||
|
|
||||||
|
# embed positions
|
||||||
|
if breakmodel:
|
||||||
|
inputs_embeds = inputs_embeds.to(primary_device)
|
||||||
|
if attention_mask is None:
|
||||||
|
attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
|
||||||
|
|
||||||
|
positions = self.embed_positions(attention_mask)[:, past_key_values_length:, :]
|
||||||
|
if breakmodel:
|
||||||
|
positions = positions.to(primary_device)
|
||||||
|
|
||||||
|
attention_mask = self._prepare_decoder_attention_mask(
|
||||||
|
attention_mask, input_shape, inputs_embeds, past_key_values_length
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.project_in is not None:
|
||||||
|
inputs_embeds = self.project_in(inputs_embeds)
|
||||||
|
|
||||||
|
hidden_states = inputs_embeds + positions
|
||||||
|
|
||||||
|
hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
|
||||||
|
|
||||||
|
# decoder layers
|
||||||
|
all_hidden_states = () if output_hidden_states else None
|
||||||
|
all_self_attns = () if output_attentions else None
|
||||||
|
next_decoder_cache = () if use_cache else None
|
||||||
|
|
||||||
|
if breakmodel and ram_blocks:
|
||||||
|
copystream = torch.cuda.Stream(device=primary_device, priority=-1)
|
||||||
|
|
||||||
|
# check if head_mask has a correct number of layers specified if desired
|
||||||
|
for attn_mask, mask_name in zip([head_mask], ["head_mask"]):
|
||||||
|
if attn_mask is not None:
|
||||||
|
if attn_mask.size()[0] != (len(self.layers)):
|
||||||
|
raise ValueError(
|
||||||
|
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
|
||||||
|
f" {head_mask.size()[0]}."
|
||||||
|
)
|
||||||
|
|
||||||
|
for idx, decoder_layer in enumerate(self.layers):
|
||||||
|
i = idx
|
||||||
|
if breakmodel:
|
||||||
|
if i in range(ram_blocks):
|
||||||
|
index1 = (i+1)%ram_blocks
|
||||||
|
for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()):
|
||||||
|
param1.data = param2.data
|
||||||
|
for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()):
|
||||||
|
with torch.cuda.stream(copystream):
|
||||||
|
torch.cuda.comm.broadcast(param2.data,out = [param1.data])
|
||||||
|
|
||||||
|
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
|
||||||
|
if output_hidden_states:
|
||||||
|
all_hidden_states += (hidden_states,)
|
||||||
|
dropout_probability = random.uniform(0, 1)
|
||||||
|
if self.training and (dropout_probability < self.layerdrop):
|
||||||
|
continue
|
||||||
|
|
||||||
|
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
||||||
|
|
||||||
|
if self.gradient_checkpointing and self.training:
|
||||||
|
|
||||||
|
if use_cache:
|
||||||
|
logger.warning(
|
||||||
|
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
|
||||||
|
)
|
||||||
|
use_cache = False
|
||||||
|
|
||||||
|
def create_custom_forward(module):
|
||||||
|
def custom_forward(*inputs):
|
||||||
|
# None for past_key_value
|
||||||
|
return module(*inputs, output_attentions, None)
|
||||||
|
|
||||||
|
return custom_forward
|
||||||
|
|
||||||
|
layer_outputs = torch.utils.checkpoint.checkpoint(
|
||||||
|
create_custom_forward(decoder_layer),
|
||||||
|
hidden_states,
|
||||||
|
attention_mask,
|
||||||
|
head_mask[idx] if head_mask is not None else None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if breakmodel:
|
||||||
|
device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks)
|
||||||
|
layer_outputs = decoder_layer(
|
||||||
|
hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states,
|
||||||
|
attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask,
|
||||||
|
layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None),
|
||||||
|
past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value,
|
||||||
|
output_attentions=output_attentions,
|
||||||
|
use_cache=use_cache,
|
||||||
|
)
|
||||||
|
|
||||||
|
hidden_states = layer_outputs[0]
|
||||||
|
|
||||||
|
if use_cache:
|
||||||
|
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
|
||||||
|
|
||||||
|
if output_attentions:
|
||||||
|
all_self_attns += (layer_outputs[1],)
|
||||||
|
|
||||||
|
if breakmodel:
|
||||||
|
if i in range(ram_blocks):
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
if breakmodel:
|
||||||
|
if ram_blocks:
|
||||||
|
del copystream
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
hidden_states = hidden_states.to(primary_device)
|
||||||
|
if self.project_out is not None:
|
||||||
|
hidden_states = self.project_out(hidden_states)
|
||||||
|
if breakmodel:
|
||||||
|
hidden_states = hidden_states.to(primary_device)
|
||||||
|
|
||||||
|
# add hidden states from the last decoder layer
|
||||||
|
if output_hidden_states:
|
||||||
|
all_hidden_states += (hidden_states,)
|
||||||
|
|
||||||
|
next_cache = next_decoder_cache if use_cache else None
|
||||||
|
if not return_dict:
|
||||||
|
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
|
||||||
|
return BaseModelOutputWithPast(
|
||||||
|
last_hidden_state=hidden_states,
|
||||||
|
past_key_values=next_cache,
|
||||||
|
hidden_states=all_hidden_states,
|
||||||
|
attentions=all_self_attns,
|
||||||
|
)
|
||||||
|
@@ -867,6 +867,7 @@ return function(_python, _bridged)
|
|||||||
---@field settopk integer
|
---@field settopk integer
|
||||||
---@field settfs number
|
---@field settfs number
|
||||||
---@field settypical number
|
---@field settypical number
|
||||||
|
---@field settopa number
|
||||||
---@field setreppen number
|
---@field setreppen number
|
||||||
---@field setreppenslope number
|
---@field setreppenslope number
|
||||||
---@field setreppenrange number
|
---@field setreppenrange number
|
||||||
@@ -884,6 +885,7 @@ return function(_python, _bridged)
|
|||||||
---@field top_k integer
|
---@field top_k integer
|
||||||
---@field tfs number
|
---@field tfs number
|
||||||
---@field typical number
|
---@field typical number
|
||||||
|
---@field topa number
|
||||||
---@field reppen number
|
---@field reppen number
|
||||||
---@field reppenslope number
|
---@field reppenslope number
|
||||||
---@field reppenrange number
|
---@field reppenrange number
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
"private_outputs": true,
|
"private_outputs": true,
|
||||||
"provenance": [],
|
"provenance": [],
|
||||||
"collapsed_sections": [],
|
"collapsed_sections": [],
|
||||||
"authorship_tag": "ABX9TyOKIa/NDLlYI5j63GXPtkXv",
|
"authorship_tag": "ABX9TyPbwW79K9/RkYH9i9rkYFyj",
|
||||||
"include_colab_link": true
|
"include_colab_link": true
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -68,14 +68,20 @@
|
|||||||
"#@title <b><-- Click this to start KoboldAI</b>\n",
|
"#@title <b><-- Click this to start KoboldAI</b>\n",
|
||||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Model = \"KoboldAI/GPT-Neo-2.7B-Janeway\" #@param [\"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
|
"Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\" #@param [\"KoboldAI/fairseq-dense-2.7B-Nerys\", \"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n",
|
||||||
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||||
|
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!nvidia-smi\n",
|
"!nvidia-smi\n",
|
||||||
"from google.colab import drive\n",
|
"from google.colab import drive\n",
|
||||||
"drive.mount('/content/drive/')\n",
|
"drive.mount('/content/drive/')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version"
|
"if Provider == \"Localtunnel\":\n",
|
||||||
|
" tunnel = \"--localtunnel yes\"\n",
|
||||||
|
"else:\n",
|
||||||
|
" tunnel = \"\"\n",
|
||||||
|
"\n",
|
||||||
|
"!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
@@ -84,27 +90,32 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"# GPU Edition Model Descriptions\n",
|
"# GPU Edition Model Descriptions\n",
|
||||||
"| Model | Size | Style | Description |\n",
|
"| Model | Size | Style | Description |\n",
|
||||||
"| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
|
"| --- | --- | --- | --- |\n",
|
||||||
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
"| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||||
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||||
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
||||||
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
||||||
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
|
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
|
"# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"| Model | Size | Style | Drive Space | Description |\n",
|
"| Model | Size | Style | Description |\n",
|
||||||
"| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n",
|
"| --- | --- | --- | --- |\n",
|
||||||
"| Skein 6B by VE_FORBDRYDERNE | 6B TPU | Hybrid | 0 GB | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n",
|
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||||
"| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||||
"| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
|
||||||
"| Lit 6B by Haru | 6B TPU | NSFW | 8 GB / 12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
|
||||||
"| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
||||||
"| Generic 6B by EleutherAI | 6B TPU | Generic | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n",
|
"| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
||||||
"| C1 6B by Haru | 6B TPU | Chatbot | 8 GB / 12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
|
"| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
|
||||||
|
"| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
|
||||||
|
"| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
|
||||||
|
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
|
||||||
|
"| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"| Style | Description |\n",
|
"| Style | Description |\n",
|
||||||
@@ -113,7 +124,6 @@
|
|||||||
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
||||||
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
||||||
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
|
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
|
||||||
"| Hybrid | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n",
|
|
||||||
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# How to start KoboldAI in 7 simple steps\n",
|
"# How to start KoboldAI in 7 simple steps\n",
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
"colab_type": "text"
|
"colab_type": "text"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
"<a href=\"https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -65,8 +65,8 @@
|
|||||||
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
"#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#@title <b><-- Click this to start KoboldAI</b>\n",
|
"#@title <b><-- Click this to start KoboldAI</b>\n",
|
||||||
"Model = \"Janeway 13B\" #@param [\"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"Convo 6B\", \"C1 6B\", \"NeoX 20B\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
|
"Model = \"Nerys 13B\" #@param [\"Nerys 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"facebook/opt-13b\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n",
|
||||||
"Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
"Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
|
||||||
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
"Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
@@ -84,6 +84,10 @@
|
|||||||
" Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
|
" Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n",
|
||||||
" path = \"\"\n",
|
" path = \"\"\n",
|
||||||
" download = \"\"\n",
|
" download = \"\"\n",
|
||||||
|
"elif Model == \"Nerys 13B\":\n",
|
||||||
|
" Model = \"KoboldAI/fairseq-dense-13B-Nerys\"\n",
|
||||||
|
" path = \"\"\n",
|
||||||
|
" download = \"\"\n",
|
||||||
"elif Model == \"Shinen 13B\":\n",
|
"elif Model == \"Shinen 13B\":\n",
|
||||||
" Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
|
" Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n",
|
||||||
" path = \"\"\n",
|
" path = \"\"\n",
|
||||||
@@ -97,41 +101,25 @@
|
|||||||
" Drive = \"Unextracted (Less Space)\"\n",
|
" Drive = \"Unextracted (Less Space)\"\n",
|
||||||
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.03,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n",
|
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.03,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n",
|
||||||
"elif Model == \"Skein 6B\":\n",
|
"elif Model == \"Skein 6B\":\n",
|
||||||
" Model = \"TPUMeshTransformerGPTJ\"\n",
|
" Model = \"KoboldAI/GPT-J-6B-Skein\"\n",
|
||||||
" path = \" -p gpt-j-6b-skein-jax\"\n",
|
" path = \"\"\n",
|
||||||
" location = \"colab\"\n",
|
" download = \"\"\n",
|
||||||
" download = \" -a https://storage.henk.tech/KoboldAI/skein-jax.txt\"\n",
|
|
||||||
" extract = \"\"\n",
|
|
||||||
" Drive = \"Unextracted (Less Space)\"\n",
|
|
||||||
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings\n",
|
|
||||||
"elif Model == \"Janeway 6B\":\n",
|
"elif Model == \"Janeway 6B\":\n",
|
||||||
" Model = \"TPUMeshTransformerGPTJ\"\n",
|
" Model = \"KoboldAI/GPT-J-6B-Janeway\"\n",
|
||||||
" path = \" -p gpt-j-6b-janeway-jax\"\n",
|
" path = \"\"\n",
|
||||||
" location = \"colab\"\n",
|
" download = \"\"\n",
|
||||||
" download = \" -a https://storage.henk.tech/KoboldAI/janeway-jax.txt\"\n",
|
|
||||||
" extract = \"\"\n",
|
|
||||||
" Drive = \"Unextracted (Less Space)\"\n",
|
|
||||||
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings\n",
|
|
||||||
"elif Model == \"Adventure 6B\":\n",
|
"elif Model == \"Adventure 6B\":\n",
|
||||||
" Model = \"TPUMeshTransformerGPTJ\"\n",
|
" Model = \"KoboldAI/GPT-J-6B-Adventure\"\n",
|
||||||
" path = \" -p gpt-j-6b-adventure-jax\"\n",
|
" path = \"\"\n",
|
||||||
" location = \"colab\"\n",
|
" download = \"\"\n",
|
||||||
" download = \" -a https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-jax/aria2.txt\"\n",
|
|
||||||
" extract = \"\"\n",
|
|
||||||
" Drive = \"Unextracted (Less Space)\"\n",
|
|
||||||
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings\n",
|
|
||||||
"elif Model == \"Lit 6B\":\n",
|
"elif Model == \"Lit 6B\":\n",
|
||||||
" Model = \"hakurei/lit-6B\"\n",
|
" Model = \"hakurei/lit-6B\"\n",
|
||||||
" path = \"\"\n",
|
" path = \"\"\n",
|
||||||
" download = \"\"\n",
|
" download = \"\"\n",
|
||||||
"elif Model == \"Shinen 6B\":\n",
|
"elif Model == \"Shinen 6B\":\n",
|
||||||
" Model = \"TPUMeshTransformerGPTJ\"\n",
|
" Model = \"KoboldAI/GPT-J-6B-Shinen\"\n",
|
||||||
" path = \" -p gpt-j-6b-shinen-jax\"\n",
|
" path = \"\"\n",
|
||||||
" location = \"colab\"\n",
|
" download = \"\"\n",
|
||||||
" download = \" -a https://storage.henk.tech/KoboldAI/shinen-jax.txt\"\n",
|
|
||||||
" extract = \"\"\n",
|
|
||||||
" Drive = \"Unextracted (Less Space)\"\n",
|
|
||||||
" ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings\n",
|
|
||||||
"elif Model == \"Convo 6B\":\n",
|
"elif Model == \"Convo 6B\":\n",
|
||||||
" Model = \"hitomi-team/convo-6B\"\n",
|
" Model = \"hitomi-team/convo-6B\"\n",
|
||||||
" path = \"\"\n",
|
" path = \"\"\n",
|
||||||
@@ -159,37 +147,37 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"| Model | Size | Style | Description |\n",
|
"| Model | Size | Style | Description |\n",
|
||||||
"| --- | --- | --- | --- |\n",
|
"| --- | --- | --- | --- |\n",
|
||||||
"| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
"| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||||
"| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
|
"| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||||
"| Skein by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
|
"| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
|
||||||
"| Adventure by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
"| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
|
||||||
"| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
"| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
|
||||||
"| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n",
|
"| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
|
||||||
"| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n",
|
|
||||||
"| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
|
"| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
|
||||||
"| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
|
"| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
|
||||||
"| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
|
"| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
|
"# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"| Model | Size | Style | Description |\n",
|
"| Model | Size | Style | Description |\n",
|
||||||
"| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n",
|
"| --- | --- | --- | --- |\n",
|
||||||
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
"| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
|
||||||
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
|
||||||
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
"| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
"| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
|
||||||
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
|
||||||
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
"| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
"| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
|
||||||
|
"| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"| Style | Description |\n",
|
"| Style | Description |\n",
|
||||||
"| --------- | ------------------------------------------------------------ |\n",
|
"| --- | --- |\n",
|
||||||
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
|
"| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n",
|
||||||
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
"| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n",
|
||||||
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
"| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n",
|
||||||
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
|
"| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n",
|
||||||
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
"| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n",
|
||||||
"\n",
|
"\n",
|
||||||
"---\n",
|
"---\n",
|
||||||
"## Tips to get the most out of Google Colab\n",
|
"## Tips to get the most out of Google Colab\n",
|
||||||
|
@@ -20,4 +20,5 @@ dependencies:
|
|||||||
- flask-cloudflared
|
- flask-cloudflared
|
||||||
- flask-ngrok
|
- flask-ngrok
|
||||||
- lupa==1.10
|
- lupa==1.10
|
||||||
- transformers>=4.17
|
- transformers>=4.20.1
|
||||||
|
- accelerate
|
@@ -20,4 +20,5 @@ dependencies:
|
|||||||
- flask-cloudflared
|
- flask-cloudflared
|
||||||
- flask-ngrok
|
- flask-ngrok
|
||||||
- lupa==1.10
|
- lupa==1.10
|
||||||
- transformers>=4.17
|
- transformers>=4.20.1
|
||||||
|
- accelerate
|
||||||
|
@@ -64,6 +64,17 @@ gensettingstf = [
|
|||||||
"step": 0.05,
|
"step": 0.05,
|
||||||
"default": 1.0,
|
"default": 1.0,
|
||||||
"tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
|
"tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uitype": "slider",
|
||||||
|
"unit": "float",
|
||||||
|
"label": "Top a Sampling",
|
||||||
|
"id": "settopa",
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 1.0,
|
||||||
|
"step": 0.01,
|
||||||
|
"default": 0.0,
|
||||||
|
"tooltip": "Alternative sampling method that reduces the randomness of the AI whenever the probability of one token is much higher than all the others. Higher values have a stronger effect. Set this setting to 0 to disable its effect."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"uitype": "slider",
|
"uitype": "slider",
|
||||||
|
BIN
koboldai.ico
Normal file
BIN
koboldai.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 150 KiB |
BIN
koboldaiblue.ico
Normal file
BIN
koboldaiblue.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 152 KiB |
BIN
koboldaigreen.ico
Normal file
BIN
koboldaigreen.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 151 KiB |
37
maps/opt.json
Normal file
37
maps/opt.json
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"mtj_compat": "opt",
|
||||||
|
"mtj_pe": "fixed",
|
||||||
|
"mtj_config_map": {
|
||||||
|
"do_layer_norm_before": ["do_layer_norm_before", true],
|
||||||
|
"d_embed": "word_embed_proj_dim",
|
||||||
|
"d_model": "hidden_size",
|
||||||
|
"n_heads": "num_attention_heads",
|
||||||
|
"layers": "num_hidden_layers"
|
||||||
|
},
|
||||||
|
"static_weights": {
|
||||||
|
"decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
|
||||||
|
"decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}},
|
||||||
|
"decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}},
|
||||||
|
"decoder.final_layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
|
||||||
|
"decoder.final_layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}},
|
||||||
|
"decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}}
|
||||||
|
},
|
||||||
|
"layer_weights": {
|
||||||
|
"decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
|
||||||
|
"decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
|
||||||
|
"decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
|
||||||
|
"decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
|
||||||
|
"decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
|
||||||
|
"decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
|
||||||
|
"decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
|
||||||
|
"decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
|
||||||
|
}
|
||||||
|
}
|
73
readme.md
73
readme.md
@@ -50,49 +50,50 @@ Each edition features different models and requires different hardware to run, t
|
|||||||
|
|
||||||
### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
|
### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
|
||||||
|
|
||||||
|
| Model | Size | Style | Description |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
|
||||||
|
| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
|
||||||
|
| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
|
||||||
|
| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
|
||||||
|
| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
|
||||||
|
| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
|
||||||
|
| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
|
||||||
|
| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
|
||||||
|
| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
|
||||||
|
| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
|
||||||
|
| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
|
||||||
|
|
||||||
|
## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
||||||
|
|
||||||
| Model | Size | Style | Description |
|
| Model | Size | Style | Description |
|
||||||
| --- | --- | --- | --- |
|
| --- | --- | --- | --- |
|
||||||
| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
|
| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |
|
||||||
| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
|
| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
|
||||||
| Skein by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
|
| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
|
||||||
| Adventure by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
|
| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
|
||||||
| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
|
| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
|
||||||
| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
|
| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
|
||||||
| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
|
| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
|
||||||
| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
|
| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
|
||||||
| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
|
|
||||||
| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
|
|
||||||
|
|
||||||
|
| Style | Description |
|
||||||
# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
| --- | --- |
|
||||||
|
| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
|
||||||
| Model | Size | Style | Description |
|
| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
|
||||||
| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
|
|
||||||
| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
|
|
||||||
| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
|
|
||||||
| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
|
|
||||||
| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
|
|
||||||
| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
|
|
||||||
| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
|
|
||||||
| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
|
|
||||||
|
|
||||||
| Style | Description |
|
|
||||||
| --------- | ------------------------------------------------------------ |
|
|
||||||
| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
|
|
||||||
| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
|
|
||||||
| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
|
| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
|
||||||
| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
|
| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
|
||||||
| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
|
| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
|
||||||
|
|
||||||
---
|
|
||||||
## Tips to get the most out of Google Colab
|
## Tips to get the most out of Google Colab
|
||||||
- Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
|
|
||||||
- KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
|
* Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
|
||||||
- Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
|
* KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
|
||||||
- Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
|
* Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
|
||||||
- Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
|
* Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
|
||||||
- Models stored on Google Drive typically load faster than models we need to download from the internet.
|
* Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
|
||||||
|
* Models stored on Google Drive typically load faster than models we need to download from the internet.
|
||||||
|
|
||||||
### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
|
||||||
|
|
||||||
| Model | Size | Type | Description |
|
| Model | Size | Type | Description |
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
transformers>=4.17
|
transformers>=4.20.1
|
||||||
Flask
|
Flask
|
||||||
Flask-SocketIO
|
Flask-SocketIO
|
||||||
requests
|
requests
|
||||||
@@ -11,3 +11,4 @@ markdown
|
|||||||
bleach==4.1.0
|
bleach==4.1.0
|
||||||
sentencepiece
|
sentencepiece
|
||||||
protobuf
|
protobuf
|
||||||
|
accelerate
|
@@ -5,7 +5,7 @@ requests
|
|||||||
optax >= 0.0.5, <= 0.0.9
|
optax >= 0.0.5, <= 0.0.9
|
||||||
dm-haiku == 0.0.5
|
dm-haiku == 0.0.5
|
||||||
jax == 0.2.21
|
jax == 0.2.21
|
||||||
transformers >= 4.17
|
transformers >= 4.19
|
||||||
progressbar2
|
progressbar2
|
||||||
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
|
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
|
||||||
flask
|
flask
|
||||||
|
@@ -20,6 +20,7 @@ var button_settings;
|
|||||||
var button_format;
|
var button_format;
|
||||||
var button_softprompt;
|
var button_softprompt;
|
||||||
var button_userscripts;
|
var button_userscripts;
|
||||||
|
var button_samplers;
|
||||||
var button_mode;
|
var button_mode;
|
||||||
var button_mode_label;
|
var button_mode_label;
|
||||||
var button_send;
|
var button_send;
|
||||||
@@ -106,6 +107,12 @@ var using_webkit_patch = true;
|
|||||||
var shift_down = false;
|
var shift_down = false;
|
||||||
var do_clear_ent = false;
|
var do_clear_ent = false;
|
||||||
|
|
||||||
|
// Whether or not an entry in the Userscripts menu is being dragged
|
||||||
|
var us_dragging = false;
|
||||||
|
|
||||||
|
// Whether or not an entry in the Samplers menu is being dragged
|
||||||
|
var samplers_dragging = false;
|
||||||
|
|
||||||
// Display vars
|
// Display vars
|
||||||
var allowtoggle = false;
|
var allowtoggle = false;
|
||||||
var formatcount = 0;
|
var formatcount = 0;
|
||||||
@@ -173,20 +180,36 @@ function addSetting(ob) {
|
|||||||
window["setting_"+ob.id] = refin; // Is this still needed?
|
window["setting_"+ob.id] = refin; // Is this still needed?
|
||||||
window["label_"+ob.id] = reflb; // Is this still needed?
|
window["label_"+ob.id] = reflb; // Is this still needed?
|
||||||
// Add event function to input
|
// Add event function to input
|
||||||
|
var updateLabelColor = function () {
|
||||||
|
var value = (ob.unit === "float" ? parseFloat : parseInt)(reflb.val());
|
||||||
|
if(value > ob.max || value < ob.min) {
|
||||||
|
reflb.addClass("setting-value-warning");
|
||||||
|
} else {
|
||||||
|
reflb.removeClass("setting-value-warning");
|
||||||
|
}
|
||||||
|
}
|
||||||
var send = function () {
|
var send = function () {
|
||||||
sliders_throttle(ob.id, function () {
|
sliders_throttle(ob.id, function () {
|
||||||
socket.send({'cmd': $(refin).attr('id'), 'data': $(refin).val()});
|
socket.send({'cmd': $(refin).attr('id'), 'data': $(reflb).val()});
|
||||||
});
|
});
|
||||||
reflb.val($(refin).val());
|
|
||||||
}
|
}
|
||||||
refin.on("input", send);
|
refin.on("input", function (event) {
|
||||||
|
reflb.val(refin.val());
|
||||||
|
updateLabelColor();
|
||||||
|
send();
|
||||||
|
}).on("change", updateLabelColor);
|
||||||
reflb.on("change", function (event) {
|
reflb.on("change", function (event) {
|
||||||
var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value);
|
var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value);
|
||||||
if(Number.isNaN(value) || value > ob.max || value < ob.min) {
|
if(Number.isNaN(value) || (ob.min >= 0 && value < 0)) {
|
||||||
event.target.value = refin.val();
|
event.target.value = refin.val();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (ob.unit === "float") {
|
||||||
|
value = parseFloat(value.toFixed(3)); // Round to 3 decimal places to help avoid the number being too long to fit in the box
|
||||||
|
}
|
||||||
refin.val(value);
|
refin.val(value);
|
||||||
|
reflb.val(value);
|
||||||
|
updateLabelColor();
|
||||||
send();
|
send();
|
||||||
});
|
});
|
||||||
} else if(ob.uitype == "toggle"){
|
} else if(ob.uitype == "toggle"){
|
||||||
@@ -957,6 +980,16 @@ function hideUSPopup() {
|
|||||||
spcontent.html("");
|
spcontent.html("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function showSamplersPopup() {
|
||||||
|
samplerspopup.removeClass("hidden");
|
||||||
|
samplerspopup.addClass("flex");
|
||||||
|
}
|
||||||
|
|
||||||
|
function hideSamplersPopup() {
|
||||||
|
samplerspopup.removeClass("flex");
|
||||||
|
samplerspopup.addClass("hidden");
|
||||||
|
}
|
||||||
|
|
||||||
function buildLoadList(ar) {
|
function buildLoadList(ar) {
|
||||||
disableButtons([load_accept]);
|
disableButtons([load_accept]);
|
||||||
loadcontent.html("");
|
loadcontent.html("");
|
||||||
@@ -1090,6 +1123,29 @@ function buildUSList(unloaded, loaded) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildSamplerList(samplers) {
|
||||||
|
samplerslist.html("");
|
||||||
|
showSamplersPopup();
|
||||||
|
var i;
|
||||||
|
var samplers_lookup_table = [
|
||||||
|
"Top-k Sampling",
|
||||||
|
"Top-a Sampling",
|
||||||
|
"Top-p Sampling",
|
||||||
|
"Tail-free Sampling",
|
||||||
|
"Typical Sampling",
|
||||||
|
"Temperature",
|
||||||
|
]
|
||||||
|
for(i=0; i<samplers.length; i++) {
|
||||||
|
samplerslist.append("<div class=\"flex\">\
|
||||||
|
<div class=\"samplerslistitem flex-row-container\" sid=\""+samplers[i]+"\">\
|
||||||
|
<div class=\"flex-row\">\
|
||||||
|
<div>"+samplers_lookup_table[samplers[i]]+"</div>\
|
||||||
|
</div>\
|
||||||
|
</div>\
|
||||||
|
</div>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function highlightLoadLine(ref) {
|
function highlightLoadLine(ref) {
|
||||||
$("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected");
|
$("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected");
|
||||||
ref.addClass("popuplistselected");
|
ref.addClass("popuplistselected");
|
||||||
@@ -1819,6 +1875,7 @@ $(document).ready(function(){
|
|||||||
button_format = $('#btn_format');
|
button_format = $('#btn_format');
|
||||||
button_softprompt = $("#btn_softprompt");
|
button_softprompt = $("#btn_softprompt");
|
||||||
button_userscripts= $("#btn_userscripts");
|
button_userscripts= $("#btn_userscripts");
|
||||||
|
button_samplers = $("#btn_samplers");
|
||||||
button_mode = $('#btnmode')
|
button_mode = $('#btnmode')
|
||||||
button_mode_label = $('#btnmode_label')
|
button_mode_label = $('#btnmode_label')
|
||||||
button_send = $('#btnsend');
|
button_send = $('#btnsend');
|
||||||
@@ -1867,6 +1924,10 @@ $(document).ready(function(){
|
|||||||
usloaded = $("#uslistloaded");
|
usloaded = $("#uslistloaded");
|
||||||
us_accept = $("#btn_usaccept");
|
us_accept = $("#btn_usaccept");
|
||||||
us_close = $("#btn_usclose");
|
us_close = $("#btn_usclose");
|
||||||
|
samplerspopup = $("#samplerscontainer");
|
||||||
|
samplerslist = $("#samplerslist");
|
||||||
|
samplers_accept = $("#btn_samplersaccept");
|
||||||
|
samplers_close = $("#btn_samplersclose");
|
||||||
nspopup = $("#newgamecontainer");
|
nspopup = $("#newgamecontainer");
|
||||||
ns_accept = $("#btn_nsaccept");
|
ns_accept = $("#btn_nsaccept");
|
||||||
ns_close = $("#btn_nsclose");
|
ns_close = $("#btn_nsclose");
|
||||||
@@ -1889,7 +1950,7 @@ $(document).ready(function(){
|
|||||||
modelname = msg.modelname;
|
modelname = msg.modelname;
|
||||||
}
|
}
|
||||||
refreshTitle();
|
refreshTitle();
|
||||||
connect_status.html("<b>Connected to KoboldAI Process!</b>");
|
connect_status.html("<b>Connected to KoboldAI!</b>");
|
||||||
connect_status.removeClass("color_orange");
|
connect_status.removeClass("color_orange");
|
||||||
connect_status.addClass("color_green");
|
connect_status.addClass("color_green");
|
||||||
// Reset Menus
|
// Reset Menus
|
||||||
@@ -2059,48 +2120,52 @@ $(document).ready(function(){
|
|||||||
newTextHighlight($("#n"+msg.data))
|
newTextHighlight($("#n"+msg.data))
|
||||||
} else if(msg.cmd == "updatetemp") {
|
} else if(msg.cmd == "updatetemp") {
|
||||||
// Send current temp value to input
|
// Send current temp value to input
|
||||||
$("#settemp").val(parseFloat(msg.data));
|
|
||||||
$("#settempcur").val(msg.data);
|
$("#settempcur").val(msg.data);
|
||||||
|
$("#settemp").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatetopp") {
|
} else if(msg.cmd == "updatetopp") {
|
||||||
// Send current top p value to input
|
// Send current top p value to input
|
||||||
$("#settopp").val(parseFloat(msg.data));
|
|
||||||
$("#settoppcur").val(msg.data);
|
$("#settoppcur").val(msg.data);
|
||||||
|
$("#settopp").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatetopk") {
|
} else if(msg.cmd == "updatetopk") {
|
||||||
// Send current top k value to input
|
// Send current top k value to input
|
||||||
$("#settopk").val(parseFloat(msg.data));
|
|
||||||
$("#settopkcur").val(msg.data);
|
$("#settopkcur").val(msg.data);
|
||||||
|
$("#settopk").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatetfs") {
|
} else if(msg.cmd == "updatetfs") {
|
||||||
// Send current tfs value to input
|
// Send current tfs value to input
|
||||||
$("#settfs").val(parseFloat(msg.data));
|
|
||||||
$("#settfscur").val(msg.data);
|
$("#settfscur").val(msg.data);
|
||||||
|
$("#settfs").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatetypical") {
|
} else if(msg.cmd == "updatetypical") {
|
||||||
// Send current typical value to input
|
// Send current typical value to input
|
||||||
$("#settypical").val(parseFloat(msg.data));
|
|
||||||
$("#settypicalcur").val(msg.data);
|
$("#settypicalcur").val(msg.data);
|
||||||
|
$("#settypical").val(parseFloat(msg.data)).trigger("change");
|
||||||
|
} else if(msg.cmd == "updatetopa") {
|
||||||
|
// Send current top a value to input
|
||||||
|
$("#settopacur").val(msg.data);
|
||||||
|
$("#settopa").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatereppen") {
|
} else if(msg.cmd == "updatereppen") {
|
||||||
// Send current rep pen value to input
|
// Send current rep pen value to input
|
||||||
$("#setreppen").val(parseFloat(msg.data));
|
|
||||||
$("#setreppencur").val(msg.data);
|
$("#setreppencur").val(msg.data);
|
||||||
|
$("#setreppen").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatereppenslope") {
|
} else if(msg.cmd == "updatereppenslope") {
|
||||||
// Send current rep pen value to input
|
// Send current rep pen value to input
|
||||||
$("#setreppenslope").val(parseFloat(msg.data));
|
|
||||||
$("#setreppenslopecur").val(msg.data);
|
$("#setreppenslopecur").val(msg.data);
|
||||||
|
$("#setreppenslope").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatereppenrange") {
|
} else if(msg.cmd == "updatereppenrange") {
|
||||||
// Send current rep pen value to input
|
// Send current rep pen value to input
|
||||||
$("#setreppenrange").val(parseFloat(msg.data));
|
|
||||||
$("#setreppenrangecur").val(msg.data);
|
$("#setreppenrangecur").val(msg.data);
|
||||||
|
$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updateoutlen") {
|
} else if(msg.cmd == "updateoutlen") {
|
||||||
// Send current output amt value to input
|
// Send current output amt value to input
|
||||||
$("#setoutput").val(parseInt(msg.data));
|
|
||||||
$("#setoutputcur").val(msg.data);
|
$("#setoutputcur").val(msg.data);
|
||||||
|
$("#setoutput").val(parseInt(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updatetknmax") {
|
} else if(msg.cmd == "updatetknmax") {
|
||||||
// Send current max tokens value to input
|
// Send current max tokens value to input
|
||||||
$("#settknmax").val(parseInt(msg.data));
|
|
||||||
$("#settknmaxcur").val(msg.data);
|
$("#settknmaxcur").val(msg.data);
|
||||||
|
$("#settknmax").val(parseInt(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updateikgen") {
|
} else if(msg.cmd == "updateikgen") {
|
||||||
// Send current max tokens value to input
|
// Send current max tokens value to input
|
||||||
$("#setikgen").val(parseInt(msg.data));
|
|
||||||
$("#setikgencur").val(msg.data);
|
$("#setikgencur").val(msg.data);
|
||||||
|
$("#setikgen").val(parseInt(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "setlabeltemp") {
|
} else if(msg.cmd == "setlabeltemp") {
|
||||||
// Update setting label with value from server
|
// Update setting label with value from server
|
||||||
$("#settempcur").val(msg.data);
|
$("#settempcur").val(msg.data);
|
||||||
@@ -2116,6 +2181,9 @@ $(document).ready(function(){
|
|||||||
} else if(msg.cmd == "setlabeltypical") {
|
} else if(msg.cmd == "setlabeltypical") {
|
||||||
// Update setting label with value from server
|
// Update setting label with value from server
|
||||||
$("#settypicalcur").val(msg.data);
|
$("#settypicalcur").val(msg.data);
|
||||||
|
} else if(msg.cmd == "setlabeltypical") {
|
||||||
|
// Update setting label with value from server
|
||||||
|
$("#settopa").val(msg.data);
|
||||||
} else if(msg.cmd == "setlabelreppen") {
|
} else if(msg.cmd == "setlabelreppen") {
|
||||||
// Update setting label with value from server
|
// Update setting label with value from server
|
||||||
$("#setreppencur").val(msg.data);
|
$("#setreppencur").val(msg.data);
|
||||||
@@ -2284,6 +2352,8 @@ $(document).ready(function(){
|
|||||||
buildSPList(msg.data);
|
buildSPList(msg.data);
|
||||||
} else if(msg.cmd == "buildus") {
|
} else if(msg.cmd == "buildus") {
|
||||||
buildUSList(msg.data.unloaded, msg.data.loaded);
|
buildUSList(msg.data.unloaded, msg.data.loaded);
|
||||||
|
} else if(msg.cmd == "buildsamplers") {
|
||||||
|
buildSamplerList(msg.data);
|
||||||
} else if(msg.cmd == "askforoverwrite") {
|
} else if(msg.cmd == "askforoverwrite") {
|
||||||
// Show overwrite warning
|
// Show overwrite warning
|
||||||
show([$(".saveasoverwrite")]);
|
show([$(".saveasoverwrite")]);
|
||||||
@@ -2304,15 +2374,15 @@ $(document).ready(function(){
|
|||||||
$("#setnumseqcur").html(msg.data);
|
$("#setnumseqcur").html(msg.data);
|
||||||
} else if(msg.cmd == "updatenumseq") {
|
} else if(msg.cmd == "updatenumseq") {
|
||||||
// Send current max tokens value to input
|
// Send current max tokens value to input
|
||||||
$("#setnumseq").val(parseInt(msg.data));
|
|
||||||
$("#setnumseqcur").html(msg.data);
|
$("#setnumseqcur").html(msg.data);
|
||||||
|
$("#setnumseq").val(parseInt(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "setlabelwidepth") {
|
} else if(msg.cmd == "setlabelwidepth") {
|
||||||
// Update setting label with value from server
|
// Update setting label with value from server
|
||||||
$("#setwidepthcur").html(msg.data);
|
$("#setwidepthcur").html(msg.data);
|
||||||
} else if(msg.cmd == "updatewidepth") {
|
} else if(msg.cmd == "updatewidepth") {
|
||||||
// Send current max tokens value to input
|
// Send current max tokens value to input
|
||||||
$("#setwidepth").val(parseInt(msg.data));
|
|
||||||
$("#setwidepthcur").html(msg.data);
|
$("#setwidepthcur").html(msg.data);
|
||||||
|
$("#setwidepth").val(parseInt(msg.data)).trigger("change");
|
||||||
} else if(msg.cmd == "updateuseprompt") {
|
} else if(msg.cmd == "updateuseprompt") {
|
||||||
// Update toggle state
|
// Update toggle state
|
||||||
$("#setuseprompt").prop('checked', msg.data).change();
|
$("#setuseprompt").prop('checked', msg.data).change();
|
||||||
@@ -2396,9 +2466,39 @@ $(document).ready(function(){
|
|||||||
}, 2);
|
}, 2);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
var us_click_handler = function(ev) {
|
||||||
|
setTimeout(function() {
|
||||||
|
if (us_dragging) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var target = $(ev.target).closest(".uslistitem")[0];
|
||||||
|
if ($.contains(document.getElementById("uslistunloaded"), target)) {
|
||||||
|
document.getElementById("uslistloaded").appendChild(target);
|
||||||
|
} else {
|
||||||
|
document.getElementById("uslistunloaded").appendChild(target);
|
||||||
|
}
|
||||||
|
}, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
var samplers_click_handler = function(ev) {
|
||||||
|
setTimeout(function() {
|
||||||
|
if (samplers_dragging) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var target = $(ev.target).closest(".samplerslistitem");
|
||||||
|
var next = target.parent().next().find(".samplerslistitem");
|
||||||
|
if (!next.length) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
next.parent().after(target.parent());
|
||||||
|
}, 10);
|
||||||
|
}
|
||||||
|
|
||||||
// Make the userscripts menu sortable
|
// Make the userscripts menu sortable
|
||||||
var us_sortable_settings = {
|
var us_sortable_settings = {
|
||||||
placeholder: "ussortable-placeholder",
|
placeholder: "ussortable-placeholder",
|
||||||
|
start: function() { us_dragging = true; },
|
||||||
|
stop: function() { us_dragging = false; },
|
||||||
delay: 2,
|
delay: 2,
|
||||||
cursor: "move",
|
cursor: "move",
|
||||||
tolerance: "pointer",
|
tolerance: "pointer",
|
||||||
@@ -2407,12 +2507,28 @@ $(document).ready(function(){
|
|||||||
scrollSensitivity: 64,
|
scrollSensitivity: 64,
|
||||||
scrollSpeed: 10,
|
scrollSpeed: 10,
|
||||||
}
|
}
|
||||||
$(usunloaded).sortable($.extend({
|
usunloaded.sortable($.extend({
|
||||||
connectWith: "#uslistloaded",
|
connectWith: "#uslistloaded",
|
||||||
}, us_sortable_settings));
|
}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
|
||||||
$(usloaded).sortable($.extend({
|
usloaded.sortable($.extend({
|
||||||
connectWith: "#uslistunloaded",
|
connectWith: "#uslistunloaded",
|
||||||
}, us_sortable_settings));
|
}, us_sortable_settings)).on("click", ".uslistitem", us_click_handler);
|
||||||
|
|
||||||
|
// Make the samplers menu sortable
|
||||||
|
var samplers_sortable_settings = {
|
||||||
|
placeholder: "samplerssortable-placeholder",
|
||||||
|
start: function() { samplers_dragging = true; },
|
||||||
|
stop: function() { samplers_dragging = false; },
|
||||||
|
delay: 2,
|
||||||
|
cursor: "move",
|
||||||
|
tolerance: "pointer",
|
||||||
|
opacity: 0.21,
|
||||||
|
revert: 173,
|
||||||
|
scrollSensitivity: 64,
|
||||||
|
scrollSpeed: 10,
|
||||||
|
}
|
||||||
|
samplerslist.sortable($.extend({
|
||||||
|
}, samplers_sortable_settings)).on("click", ".samplerslistitem", samplers_click_handler);
|
||||||
|
|
||||||
// Bind actions to UI buttons
|
// Bind actions to UI buttons
|
||||||
button_send.on("click", function(ev) {
|
button_send.on("click", function(ev) {
|
||||||
@@ -2548,6 +2664,10 @@ $(document).ready(function(){
|
|||||||
button_userscripts.on("click", function(ev) {
|
button_userscripts.on("click", function(ev) {
|
||||||
socket.send({'cmd': 'uslistrequest', 'data': ''});
|
socket.send({'cmd': 'uslistrequest', 'data': ''});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
button_samplers.on("click", function(ev) {
|
||||||
|
socket.send({'cmd': 'samplerlistrequest', 'data': ''});
|
||||||
|
});
|
||||||
|
|
||||||
load_close.on("click", function(ev) {
|
load_close.on("click", function(ev) {
|
||||||
hideLoadPopup();
|
hideLoadPopup();
|
||||||
@@ -2581,6 +2701,16 @@ $(document).ready(function(){
|
|||||||
socket.send({'cmd': 'usload', 'data': ''});
|
socket.send({'cmd': 'usload', 'data': ''});
|
||||||
hideUSPopup();
|
hideUSPopup();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
samplers_close.on("click", function(ev) {
|
||||||
|
hideSamplersPopup();
|
||||||
|
});
|
||||||
|
|
||||||
|
samplers_accept.on("click", function(ev) {
|
||||||
|
hideMessage();
|
||||||
|
socket.send({'cmd': 'samplers', 'data': samplerslist.find(".samplerslistitem").map(function() { return parseInt($(this).attr("sid")); }).toArray()});
|
||||||
|
hideSamplersPopup();
|
||||||
|
});
|
||||||
|
|
||||||
button_newgame.on("click", function(ev) {
|
button_newgame.on("click", function(ev) {
|
||||||
if(connected) {
|
if(connected) {
|
||||||
|
@@ -22,6 +22,14 @@ chunk.editing, chunk.editing * {
|
|||||||
font-style: normal !important;
|
font-style: normal !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.setting-value-warning {
|
||||||
|
color: #ff7777;
|
||||||
|
}
|
||||||
|
|
||||||
|
.setting-value-warning:focus {
|
||||||
|
color: #ffaaaa !important;
|
||||||
|
}
|
||||||
|
|
||||||
.settinglabel input {
|
.settinglabel input {
|
||||||
width: 5ch;
|
width: 5ch;
|
||||||
background-color: inherit;
|
background-color: inherit;
|
||||||
@@ -449,6 +457,26 @@ body.connected #popupfooter, #popupfooter.always-available {
|
|||||||
overflow-wrap: anywhere;
|
overflow-wrap: anywhere;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#samplerspopup {
|
||||||
|
width: 300px;
|
||||||
|
background-color: #262626;
|
||||||
|
margin-top: 100px;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
#samplerspopup {
|
||||||
|
width: 100%;
|
||||||
|
background-color: #262626;
|
||||||
|
margin-top: 100px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#samplerslist {
|
||||||
|
height: 300px;
|
||||||
|
overflow-y: scroll;
|
||||||
|
overflow-wrap: anywhere;
|
||||||
|
}
|
||||||
|
|
||||||
#nspopup {
|
#nspopup {
|
||||||
width: 350px;
|
width: 350px;
|
||||||
background-color: #262626;
|
background-color: #262626;
|
||||||
@@ -742,7 +770,7 @@ body.connected .dropdown-item:hover, .dropdown-item.always-available:hover {
|
|||||||
background-color: #3bf723;
|
background-color: #3bf723;
|
||||||
}
|
}
|
||||||
|
|
||||||
.ussortable-placeholder {
|
.ussortable-placeholder, .samplerssortable-placeholder {
|
||||||
height: 4px;
|
height: 4px;
|
||||||
background-color: #3bf723;
|
background-color: #3bf723;
|
||||||
}
|
}
|
||||||
@@ -1332,7 +1360,7 @@ body.connected .popupfooter, .popupfooter.always-available {
|
|||||||
background-color: #688f1f;
|
background-color: #688f1f;
|
||||||
}
|
}
|
||||||
|
|
||||||
.uslistitem {
|
.uslistitem, .samplerslistitem {
|
||||||
padding: 12px 10px 12px 10px;
|
padding: 12px 10px 12px 10px;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
@@ -1344,11 +1372,11 @@ body.connected .popupfooter, .popupfooter.always-available {
|
|||||||
transition: background-color 0.25s ease-in;
|
transition: background-color 0.25s ease-in;
|
||||||
}
|
}
|
||||||
|
|
||||||
.uslistitemsub {
|
.uslistitemsub, .samplerslistitemsub {
|
||||||
color: #ba9;
|
color: #ba9;
|
||||||
}
|
}
|
||||||
|
|
||||||
.uslistitem:hover {
|
.uslistitem:hover, .samplerslistitem:hover {
|
||||||
cursor: move;
|
cursor: move;
|
||||||
background-color: #688f1f;
|
background-color: #688f1f;
|
||||||
}
|
}
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
<link rel="stylesheet" href="static/bootstrap.min.css">
|
<link rel="stylesheet" href="static/bootstrap.min.css">
|
||||||
<link rel="stylesheet" href="static/bootstrap-toggle.min.css">
|
<link rel="stylesheet" href="static/bootstrap-toggle.min.css">
|
||||||
<link rel="stylesheet" href="static/open-iconic-bootstrap.min.css">
|
<link rel="stylesheet" href="static/open-iconic-bootstrap.min.css">
|
||||||
<link rel="stylesheet" href="static/custom.css?ver=1.17a">
|
<link rel="stylesheet" href="static/custom.css?ver=1.18.1a">
|
||||||
|
|
||||||
<script src="static/jquery-3.6.0.min.js"></script>
|
<script src="static/jquery-3.6.0.min.js"></script>
|
||||||
<script src="static/jquery-ui.sortable.min.js"></script>
|
<script src="static/jquery-ui.sortable.min.js"></script>
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
<script src="static/bootstrap.min.js"></script>
|
<script src="static/bootstrap.min.js"></script>
|
||||||
<script src="static/bootstrap-toggle.min.js"></script>
|
<script src="static/bootstrap-toggle.min.js"></script>
|
||||||
<script src="static/rangy-core.min.js"></script>
|
<script src="static/rangy-core.min.js"></script>
|
||||||
<script src="static/application.js?ver=1.17e"></script>
|
<script src="static/application.js?ver=1.18.1a"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<input type="file" id="remote-save-select" accept="application/json" style="display:none">
|
<input type="file" id="remote-save-select" accept="application/json" style="display:none">
|
||||||
@@ -71,6 +71,9 @@
|
|||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link" href="#" id="btn_format">Formatting</a>
|
<a class="nav-link" href="#" id="btn_format">Formatting</a>
|
||||||
</li>
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="#" id="btn_samplers">Samplers</a>
|
||||||
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link" href="#" id="btn_userscripts">Userscripts</a>
|
<a class="nav-link" href="#" id="btn_userscripts">Userscripts</a>
|
||||||
</li>
|
</li>
|
||||||
@@ -299,6 +302,19 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="popupcontainer hidden" id="samplerscontainer">
|
||||||
|
<div id="samplerspopup">
|
||||||
|
<div class="popuptitlebar">
|
||||||
|
<div class="popuptitletext">Drag-and-drop to change the order in which the samplers are applied</div>
|
||||||
|
</div>
|
||||||
|
<div id="samplerslist">
|
||||||
|
</div>
|
||||||
|
<div class="popupfooter">
|
||||||
|
<button type="button" class="btn btn-primary" id="btn_samplersaccept">Save</button>
|
||||||
|
<button type="button" class="btn btn-primary" id="btn_samplersclose">Cancel</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="popupcontainer hidden" id="loadcontainerdelete">
|
<div class="popupcontainer hidden" id="loadcontainerdelete">
|
||||||
<div id="loadpopupdelete">
|
<div id="loadpopupdelete">
|
||||||
<div class="popuptitlebar">
|
<div class="popuptitlebar">
|
||||||
|
@@ -27,6 +27,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
|
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
|
||||||
import progressbar
|
import progressbar
|
||||||
@@ -63,11 +65,13 @@ def stopping_callback(generated, n_generated, excluded_world_info) -> Tuple[List
|
|||||||
|
|
||||||
def settings_callback() -> dict:
|
def settings_callback() -> dict:
|
||||||
return {
|
return {
|
||||||
|
"sampler_order": utils.default_sampler_order.copy(),
|
||||||
"top_p": 0.9,
|
"top_p": 0.9,
|
||||||
"temp": 0.5,
|
"temp": 0.5,
|
||||||
"top_k": 0,
|
"top_k": 0,
|
||||||
"tfs": 1.0,
|
"tfs": 1.0,
|
||||||
"typical": 1.0,
|
"typical": 1.0,
|
||||||
|
"top_a": 0.0,
|
||||||
"repetition_penalty": 1.0,
|
"repetition_penalty": 1.0,
|
||||||
"rpslope": 0.0,
|
"rpslope": 0.0,
|
||||||
"rprange": 0,
|
"rprange": 0,
|
||||||
@@ -156,10 +160,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
|
|||||||
logits[tokens] = penalty_logits
|
logits[tokens] = penalty_logits
|
||||||
return logits
|
return logits
|
||||||
|
|
||||||
def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
|
def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
|
||||||
'''
|
'''
|
||||||
This gets called by generate_loop_fn to apply a series of 5 filters
|
This gets called by generate_loop_fn to apply a series of 6 filters
|
||||||
to the logits (top-k, then top-p, then TFS, then typical, then temperature)
|
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
|
||||||
before picking one token using the modified logits
|
before picking one token using the modified logits
|
||||||
'''
|
'''
|
||||||
# Top-k (keep only the k tokens with the highest logits and remove
|
# Top-k (keep only the k tokens with the highest logits and remove
|
||||||
@@ -178,8 +182,18 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return np.where(indices_to_remove, -np.inf, logits)
|
return np.where(indices_to_remove, -np.inf, logits)
|
||||||
if top_k > 0:
|
# Top-a (remove all tokens that have softmax probability less than
|
||||||
logits = top_k_filter(logits)
|
# a*m^2 where m is the maximum softmax probability)
|
||||||
|
def top_a_filter(logits):
|
||||||
|
# Replace every element in the logits array
|
||||||
|
# with e (Euler's number) to the power of that element, and divide
|
||||||
|
# each element of the new array by the sum of the elements in the
|
||||||
|
# new array
|
||||||
|
probabilities = np.array(jax.nn.softmax(logits), copy=True)
|
||||||
|
# Find the largest probability
|
||||||
|
probs_max = probabilities.max()
|
||||||
|
# Remove tokens
|
||||||
|
return np.where(probabilities < probs_max * probs_max * top_a, -np.inf, logits)
|
||||||
# Top-p (after sorting the remaining tokens again in descending order of
|
# Top-p (after sorting the remaining tokens again in descending order of
|
||||||
# logit, remove the ones that have cumulative softmax probability
|
# logit, remove the ones that have cumulative softmax probability
|
||||||
# greater than p)
|
# greater than p)
|
||||||
@@ -205,8 +219,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return np.where(indices_to_remove, -np.inf, logits)
|
return np.where(indices_to_remove, -np.inf, logits)
|
||||||
if top_p < 1.0:
|
|
||||||
logits = top_p_filter(logits)
|
|
||||||
# Tail free sampling (basically top-p a second time on remaining tokens
|
# Tail free sampling (basically top-p a second time on remaining tokens
|
||||||
# except it's the "cumulative normalized absolute second finite
|
# except it's the "cumulative normalized absolute second finite
|
||||||
# differences of the softmax probabilities" instead of just the
|
# differences of the softmax probabilities" instead of just the
|
||||||
@@ -245,8 +257,6 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return np.where(indices_to_remove, -np.inf, logits)
|
return np.where(indices_to_remove, -np.inf, logits)
|
||||||
if tfs < 1.0:
|
|
||||||
logits = tail_free_filter(logits)
|
|
||||||
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
|
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
|
||||||
def typical_filter(logits):
|
def typical_filter(logits):
|
||||||
# Compute softmax probabilities and the natural logarithms of them
|
# Compute softmax probabilities and the natural logarithms of them
|
||||||
@@ -276,10 +286,16 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return np.where(indices_to_remove, -jnp.inf, logits)
|
return np.where(indices_to_remove, -jnp.inf, logits)
|
||||||
if typical < 1.0:
|
|
||||||
logits = typical_filter(logits)
|
|
||||||
# Temperature (just divide the logits by the temperature)
|
# Temperature (just divide the logits by the temperature)
|
||||||
logits /= temp
|
def temp_filter(logits):
|
||||||
|
return logits / temp
|
||||||
|
for k in sampler_order:
|
||||||
|
if k == 0 and top_k > 0: logits = top_k_filter(logits)
|
||||||
|
if k == 1 and top_a > 0.0: logits = top_a_filter(logits)
|
||||||
|
if k == 2 and top_p < 1.0: logits = top_p_filter(logits)
|
||||||
|
if k == 3 and tfs < 1.0: logits = tail_free_filter(logits)
|
||||||
|
if k == 4 and typical < 1.0: logits = typical_filter(logits)
|
||||||
|
if k == 5 and temp != 1.0: logits = temp_filter(logits)
|
||||||
# Finally, pick one token using the softmax thingy again (it gives
|
# Finally, pick one token using the softmax thingy again (it gives
|
||||||
# an array whose elements sum to 1 so it can be used nicely as a
|
# an array whose elements sum to 1 so it can be used nicely as a
|
||||||
# probability distribution)
|
# probability distribution)
|
||||||
@@ -330,10 +346,10 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
|
|||||||
# positions in the logits array
|
# positions in the logits array
|
||||||
return logits.at[tokens].set(penalty_logits)
|
return logits.at[tokens].set(penalty_logits)
|
||||||
|
|
||||||
def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0):
|
def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
|
||||||
'''
|
'''
|
||||||
This gets called by generate_loop_fn to apply a series of 5 filters
|
This gets called by generate_loop_fn to apply a series of 6 filters
|
||||||
to the logits (top-k, then top-p, then TFS, then typical, then temperature)
|
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
|
||||||
before picking one token using the modified logits
|
before picking one token using the modified logits
|
||||||
'''
|
'''
|
||||||
# Top-k (keep only the k tokens with the highest logits and remove
|
# Top-k (keep only the k tokens with the highest logits and remove
|
||||||
@@ -352,7 +368,18 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
||||||
logits = jax.lax.cond(top_k > 0, top_k_filter, lambda x: x, logits)
|
# Top-a (remove all tokens that have softmax probability less than
|
||||||
|
# a*m^2 where m is the maximum softmax probability)
|
||||||
|
def top_a_filter(logits):
|
||||||
|
# Replace every element in the logits array
|
||||||
|
# with e (Euler's number) to the power of that element, and divide
|
||||||
|
# each element of the new array by the sum of the elements in the
|
||||||
|
# new array
|
||||||
|
probabilities = jax.nn.softmax(logits)
|
||||||
|
# Find the largest probability
|
||||||
|
probs_max = probabilities.max()
|
||||||
|
# Remove tokens
|
||||||
|
return jnp.where(probabilities < probs_max * probs_max * top_a, -jnp.inf, logits)
|
||||||
# Top-p (after sorting the remaining tokens again in descending order of
|
# Top-p (after sorting the remaining tokens again in descending order of
|
||||||
# logit, remove the ones that have cumulative softmax probability
|
# logit, remove the ones that have cumulative softmax probability
|
||||||
# greater than p)
|
# greater than p)
|
||||||
@@ -378,7 +405,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
||||||
logits = jax.lax.cond(top_p < 1.0, top_p_filter, lambda x: x, logits)
|
|
||||||
# Tail free sampling (basically top-p a second time on remaining tokens
|
# Tail free sampling (basically top-p a second time on remaining tokens
|
||||||
# except it's the "cumulative normalized absolute second finite
|
# except it's the "cumulative normalized absolute second finite
|
||||||
# differences of the softmax probabilities" instead of just the
|
# differences of the softmax probabilities" instead of just the
|
||||||
@@ -417,7 +443,6 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
||||||
logits = jax.lax.cond(tfs < 1.0, tail_free_filter, lambda x: x, logits)
|
|
||||||
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
|
# Typical sampling (https://arxiv.org/pdf/2202.00666.pdf)
|
||||||
def typical_filter(logits):
|
def typical_filter(logits):
|
||||||
# Compute softmax probabilities and the natural logarithms of them
|
# Compute softmax probabilities and the natural logarithms of them
|
||||||
@@ -446,11 +471,16 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
|
|||||||
sorted_indices_to_remove,
|
sorted_indices_to_remove,
|
||||||
)
|
)
|
||||||
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
return jnp.where(indices_to_remove, -jnp.inf, logits)
|
||||||
logits = jax.lax.cond(typical < 1.0, typical_filter, lambda x: x, logits)
|
|
||||||
# Temperature (just divide the logits by the temperature)
|
# Temperature (just divide the logits by the temperature)
|
||||||
def temp_filter(logits):
|
def temp_filter(logits):
|
||||||
return logits / temp
|
return logits / temp
|
||||||
logits = jax.lax.cond(True, temp_filter, lambda x: x, logits)
|
for k in sampler_order:
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 0, top_k > 0), top_k_filter, lambda x: x, logits)
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 1, top_a > 0.0), top_a_filter, lambda x: x, logits)
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 2, top_p < 1.0), top_p_filter, lambda x: x, logits)
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits)
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits)
|
||||||
|
logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits)
|
||||||
# Finally, pick one token using the softmax thingy again (it gives
|
# Finally, pick one token using the softmax thingy again (it gives
|
||||||
# an array whose elements sum to 1 so it can be used nicely as a
|
# an array whose elements sum to 1 so it can be used nicely as a
|
||||||
# probability distribution)
|
# probability distribution)
|
||||||
@@ -804,6 +834,7 @@ def infer_static(
|
|||||||
top_k=0,
|
top_k=0,
|
||||||
tfs=1.0,
|
tfs=1.0,
|
||||||
typical=1.0,
|
typical=1.0,
|
||||||
|
top_a=0.0,
|
||||||
repetition_penalty=1.0,
|
repetition_penalty=1.0,
|
||||||
rpslope=0.0,
|
rpslope=0.0,
|
||||||
rprange=0,
|
rprange=0,
|
||||||
@@ -811,8 +842,12 @@ def infer_static(
|
|||||||
gen_len=80,
|
gen_len=80,
|
||||||
soft_embeddings: Optional[np.array] = None,
|
soft_embeddings: Optional[np.array] = None,
|
||||||
soft_tokens: Optional[np.array] = None,
|
soft_tokens: Optional[np.array] = None,
|
||||||
|
sampler_order: Optional[List[int]] = None,
|
||||||
) -> List[np.array]:
|
) -> List[np.array]:
|
||||||
maps.thread_resources.env = thread_resources_env
|
maps.thread_resources.env = thread_resources_env
|
||||||
|
if sampler_order is None:
|
||||||
|
sampler_order = utils.default_sampler_order.copy()
|
||||||
|
sampler_order = np.uint32(sampler_order)
|
||||||
total_batch = 1
|
total_batch = 1
|
||||||
tokens = context
|
tokens = context
|
||||||
if(soft_tokens is not None):
|
if(soft_tokens is not None):
|
||||||
@@ -823,10 +858,12 @@ def infer_static(
|
|||||||
batched_tokens = np.array([padded_tokens] * total_batch)
|
batched_tokens = np.array([padded_tokens] * total_batch)
|
||||||
samples = []
|
samples = []
|
||||||
batched_generator_params = {
|
batched_generator_params = {
|
||||||
|
"sampler_order": np.repeat(sampler_order[np.newaxis], total_batch, axis=0),
|
||||||
"temp": temp * np.ones(total_batch),
|
"temp": temp * np.ones(total_batch),
|
||||||
"top_p": top_p * np.ones(total_batch),
|
"top_p": top_p * np.ones(total_batch),
|
||||||
"tfs": tfs * np.ones(total_batch),
|
"tfs": tfs * np.ones(total_batch),
|
||||||
"typical": typical * np.ones(total_batch),
|
"typical": typical * np.ones(total_batch),
|
||||||
|
"top_a": top_a * np.ones(total_batch),
|
||||||
"repetition_penalty": repetition_penalty * np.ones(total_batch),
|
"repetition_penalty": repetition_penalty * np.ones(total_batch),
|
||||||
"rpslope": rpslope * np.ones(total_batch),
|
"rpslope": rpslope * np.ones(total_batch),
|
||||||
"rprange": np.full(total_batch, rprange, dtype=np.uint32),
|
"rprange": np.full(total_batch, rprange, dtype=np.uint32),
|
||||||
@@ -983,6 +1020,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
|
|||||||
def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
|
def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
|
||||||
global thread_resources_env, seq, tokenizer, network, params
|
global thread_resources_env, seq, tokenizer, network, params
|
||||||
|
|
||||||
|
if not hasattr(vars, "sampler_order") or not vars.sampler_order:
|
||||||
|
vars.sampler_order = utils.default_sampler_order.copy()
|
||||||
|
|
||||||
default_params = {
|
default_params = {
|
||||||
"compat": "j",
|
"compat": "j",
|
||||||
"layers": 28,
|
"layers": 28,
|
||||||
@@ -1054,7 +1094,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
# by the number of TPU cores, and fall back to one core if an even
|
# by the number of TPU cores, and fall back to one core if an even
|
||||||
# number of TPU cores is not possible.
|
# number of TPU cores is not possible.
|
||||||
for c in (8, 6, 4, 2, 1):
|
for c in (8, 6, 4, 2, 1):
|
||||||
if 0 == params["n_heads"] % c == params["d_model"] % c:
|
if 0 == params["n_heads"] % c == params.get("d_embed", params["d_model"]) % c:
|
||||||
params["cores_per_replica"] = c
|
params["cores_per_replica"] = c
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -1079,6 +1119,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
return old_encode(s).ids
|
return old_encode(s).ids
|
||||||
return encode
|
return encode
|
||||||
tokenizer.encode = new_encode(tokenizer.encode)
|
tokenizer.encode = new_encode(tokenizer.encode)
|
||||||
|
tokenizer._koboldai_header = []
|
||||||
elif not hf_checkpoint:
|
elif not hf_checkpoint:
|
||||||
if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
|
if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
|
||||||
raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")
|
raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")
|
||||||
@@ -1092,13 +1133,18 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
print("Connecting to your Colab instance's TPU", flush=True)
|
print("Connecting to your Colab instance's TPU", flush=True)
|
||||||
spinner = multiprocessing.Process(target=show_spinner, args=())
|
spinner = multiprocessing.Process(target=show_spinner, args=())
|
||||||
spinner.start()
|
spinner.start()
|
||||||
colab_tpu_addr = os.environ['COLAB_TPU_ADDR'].split(':')[0]
|
if os.environ.get('COLAB_TPU_ADDR', '') != '':
|
||||||
url = f'http://{colab_tpu_addr}:8475/requestversion/{driver_version}'
|
tpu_address = os.environ['COLAB_TPU_ADDR'] # Colab
|
||||||
|
else:
|
||||||
|
tpu_address = os.environ['TPU_NAME'] # Kaggle
|
||||||
|
tpu_address = tpu_address.replace("grpc://", "")
|
||||||
|
tpu_address_without_port = tpu_address.split(':', 1)[0]
|
||||||
|
url = f'http://{tpu_address_without_port}:8475/requestversion/{driver_version}'
|
||||||
|
config.FLAGS.jax_xla_backend = "tpu_driver"
|
||||||
|
config.FLAGS.jax_backend_target = "grpc://" + tpu_address
|
||||||
requests.post(url)
|
requests.post(url)
|
||||||
spinner.terminate()
|
spinner.terminate()
|
||||||
print()
|
print()
|
||||||
config.FLAGS.jax_xla_backend = "tpu_driver"
|
|
||||||
config.FLAGS.jax_backend_target = "grpc://" + os.environ['COLAB_TPU_ADDR']
|
|
||||||
|
|
||||||
cores_per_replica = params["cores_per_replica"]
|
cores_per_replica = params["cores_per_replica"]
|
||||||
seq = params["seq"]
|
seq = params["seq"]
|
||||||
@@ -1158,13 +1204,27 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
import functools
|
import functools
|
||||||
|
|
||||||
def callback(model_dict, f, **_):
|
def callback(model_dict, f, **_):
|
||||||
|
if callback.nested:
|
||||||
|
return
|
||||||
|
callback.nested = True
|
||||||
with zipfile.ZipFile(f, "r") as z:
|
with zipfile.ZipFile(f, "r") as z:
|
||||||
try:
|
try:
|
||||||
last_storage_key = None
|
last_storage_key = None
|
||||||
f = None
|
f = None
|
||||||
current_offset = 0
|
current_offset = 0
|
||||||
print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
|
if utils.current_shard == 0:
|
||||||
for key in tqdm(sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"):
|
print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
|
||||||
|
|
||||||
|
if utils.num_shards is None or utils.current_shard == 0:
|
||||||
|
if utils.num_shards is not None:
|
||||||
|
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
|
||||||
|
else:
|
||||||
|
num_tensors = len(model_dict)
|
||||||
|
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
|
||||||
|
|
||||||
|
if utils.num_shards is not None:
|
||||||
|
utils.current_shard += 1
|
||||||
|
for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
|
||||||
|
|
||||||
# Some model weights are used by transformers but not by MTJ.
|
# Some model weights are used by transformers but not by MTJ.
|
||||||
# We have to materialize these weights anyways because
|
# We have to materialize these weights anyways because
|
||||||
@@ -1173,6 +1233,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
# tensors, which don't take up any actual CPU or TPU memory.
|
# tensors, which don't take up any actual CPU or TPU memory.
|
||||||
if key not in model_spec:
|
if key not in model_spec:
|
||||||
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
|
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
|
||||||
|
utils.bar.update(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
storage_key = model_dict[key].key
|
storage_key = model_dict[key].key
|
||||||
@@ -1200,6 +1261,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
|
|
||||||
# MTJ requires certain mathematical operations to be performed
|
# MTJ requires certain mathematical operations to be performed
|
||||||
# on tensors in order for them to be in the correct format
|
# on tensors in order for them to be in the correct format
|
||||||
|
if "remove_first_two_rows" in transforms:
|
||||||
|
tensor = tensor[2:]
|
||||||
if "divide_by_shards" in transforms:
|
if "divide_by_shards" in transforms:
|
||||||
tensor /= params["cores_per_replica"]
|
tensor /= params["cores_per_replica"]
|
||||||
if "vocab_pad" in transforms:
|
if "vocab_pad" in transforms:
|
||||||
@@ -1223,6 +1286,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
np.empty(params["cores_per_replica"]),
|
np.empty(params["cores_per_replica"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
utils.bar.update(1)
|
||||||
|
|
||||||
|
if utils.num_shards is not None and utils.current_shard < utils.num_shards:
|
||||||
|
return
|
||||||
|
|
||||||
# Check for tensors that MTJ needs that were not provided in the
|
# Check for tensors that MTJ needs that were not provided in the
|
||||||
# HF model
|
# HF model
|
||||||
for mk, mv in network.state["params"].items():
|
for mk, mv in network.state["params"].items():
|
||||||
@@ -1241,8 +1309,13 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
print("\n\nERROR: " + error, file=sys.stderr)
|
print("\n\nERROR: " + error, file=sys.stderr)
|
||||||
raise RuntimeError(error)
|
raise RuntimeError(error)
|
||||||
finally:
|
finally:
|
||||||
|
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
|
||||||
|
utils.bar.close()
|
||||||
|
utils.bar = None
|
||||||
|
callback.nested = False
|
||||||
if isinstance(f, zipfile.ZipExtFile):
|
if isinstance(f, zipfile.ZipExtFile):
|
||||||
f.close()
|
f.close()
|
||||||
|
callback.nested = False
|
||||||
|
|
||||||
if os.path.isdir(vars.model.replace('/', '_')):
|
if os.path.isdir(vars.model.replace('/', '_')):
|
||||||
import shutil
|
import shutil
|
||||||
@@ -1252,6 +1325,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
@@ -1264,6 +1341,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
@@ -1276,6 +1357,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
@@ -50,4 +50,4 @@ git remote add origin %origin%
|
|||||||
git fetch --all
|
git fetch --all
|
||||||
git checkout %branch% -f
|
git checkout %branch% -f
|
||||||
git reset --hard origin/%branch%
|
git reset --hard origin/%branch%
|
||||||
cmd /k
|
%windir%\system32\timeout -t 10
|
99
utils.py
99
utils.py
@@ -5,9 +5,22 @@ import json
|
|||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
import requests
|
import requests
|
||||||
|
import requests.adapters
|
||||||
|
import time
|
||||||
|
from tqdm.auto import tqdm
|
||||||
import os
|
import os
|
||||||
|
import itertools
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
vars = None
|
vars = None
|
||||||
|
num_shards: Optional[int] = None
|
||||||
|
current_shard = 0
|
||||||
|
from_pretrained_model_name = ""
|
||||||
|
from_pretrained_index_filename: Optional[str] = None
|
||||||
|
from_pretrained_kwargs = {}
|
||||||
|
bar = None
|
||||||
|
|
||||||
|
default_sampler_order = [0, 1, 2, 3, 4, 5]
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Decorator to prevent a function's actions from being run until
|
# Decorator to prevent a function's actions from being run until
|
||||||
@@ -130,10 +143,18 @@ def encodenewlines(txt):
|
|||||||
def decodenewlines(txt):
|
def decodenewlines(txt):
|
||||||
if(vars.newlinemode == "s"):
|
if(vars.newlinemode == "s"):
|
||||||
return txt.replace("</s>", '\n')
|
return txt.replace("</s>", '\n')
|
||||||
|
if(vars.newlinemode == "ns"):
|
||||||
|
return txt.replace("</s>", '')
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Downloads sharded huggingface checkpoints using aria2c if possible
|
# Returns number of layers given an HF model config
|
||||||
|
#==================================================================#
|
||||||
|
def num_layers(config):
|
||||||
|
return config.num_layers if hasattr(config, "num_layers") else config.n_layer if hasattr(config, "n_layer") else config.num_hidden_layers
|
||||||
|
|
||||||
|
#==================================================================#
|
||||||
|
# Downloads huggingface checkpoints using aria2c if possible
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
|
def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
|
||||||
import transformers
|
import transformers
|
||||||
@@ -191,6 +212,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||||||
if not urls:
|
if not urls:
|
||||||
return
|
return
|
||||||
etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
|
etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
|
||||||
|
headers = [requests.head(u, headers=headers, allow_redirects=True, proxies=proxies, timeout=10).headers for u in urls]
|
||||||
filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
|
filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
|
||||||
for n in filenames:
|
for n in filenames:
|
||||||
path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
|
path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
|
||||||
@@ -206,22 +228,75 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d
|
|||||||
path = os.path.join(_cache_dir, n)
|
path = os.path.join(_cache_dir, n)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
|
total_length = sum(int(h["Content-Length"]) for h in headers)
|
||||||
|
lengths = {}
|
||||||
aria2_config = "\n".join(f"{u}\n out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
|
aria2_config = "\n".join(f"{u}\n out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
|
||||||
with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
|
s = requests.Session()
|
||||||
f.write(aria2_config)
|
s.mount("http://", requests.adapters.HTTPAdapter(max_retries=requests.adapters.Retry(total=120, backoff_factor=1)))
|
||||||
f.flush()
|
bar = None
|
||||||
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming", "false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
done = False
|
||||||
for line in p.stdout:
|
secret = os.urandom(17).hex()
|
||||||
print(line.decode(), end="", flush=True)
|
|
||||||
path = f.name
|
|
||||||
try:
|
try:
|
||||||
os.remove(path)
|
with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
|
||||||
except OSError:
|
f.write(aria2_config)
|
||||||
pass
|
f.flush()
|
||||||
|
p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||||
|
while p.poll() is None:
|
||||||
|
r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"]
|
||||||
|
if not r:
|
||||||
|
s.close()
|
||||||
|
if bar is not None:
|
||||||
|
bar.n = bar.total
|
||||||
|
bar.close()
|
||||||
|
p.terminate()
|
||||||
|
done = True
|
||||||
|
break
|
||||||
|
if bar is None:
|
||||||
|
bar = tqdm(total=total_length, desc=f"[aria2] Downloading model", unit="B", unit_scale=True, unit_divisor=1000)
|
||||||
|
visited = set()
|
||||||
|
for x in r:
|
||||||
|
filename = x["files"][0]["path"]
|
||||||
|
lengths[filename] = (int(x["completedLength"]), int(x["totalLength"]))
|
||||||
|
visited.add(filename)
|
||||||
|
for k, v in lengths.items():
|
||||||
|
if k not in visited:
|
||||||
|
lengths[k] = (v[1], v[1])
|
||||||
|
bar.n = sum(v[0] for v in lengths.values())
|
||||||
|
bar.update()
|
||||||
|
time.sleep(0.1)
|
||||||
|
path = f.name
|
||||||
|
except Exception as e:
|
||||||
|
p.terminate()
|
||||||
|
raise e
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
code = p.wait()
|
code = p.wait()
|
||||||
if code:
|
if not done and code:
|
||||||
raise OSError(f"aria2 exited with exit code {code}")
|
raise OSError(f"aria2 exited with exit code {code}")
|
||||||
for u, t, n in zip(urls, etags, filenames):
|
for u, t, n in zip(urls, etags, filenames):
|
||||||
os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
|
os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
|
||||||
with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
|
with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
|
||||||
json.dump({"url": u, "etag": t}, f)
|
json.dump({"url": u, "etag": t}, f)
|
||||||
|
|
||||||
|
#==================================================================#
|
||||||
|
# Given the path to a pytorch_model.bin.index.json, returns how many
|
||||||
|
# shards there are in the model
|
||||||
|
#==================================================================#
|
||||||
|
def get_num_shards(filename):
|
||||||
|
with open(filename) as f:
|
||||||
|
map_data = json.load(f)
|
||||||
|
return len(set(map_data["weight_map"].values()))
|
||||||
|
|
||||||
|
#==================================================================#
|
||||||
|
# Given the name/path of a sharded model and the path to a
|
||||||
|
# pytorch_model.bin.index.json, returns a list of weight names in the
|
||||||
|
# sharded model. Requires lazy loader to be enabled to work properl
|
||||||
|
#==================================================================#
|
||||||
|
def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
|
||||||
|
import transformers.modeling_utils
|
||||||
|
import torch
|
||||||
|
shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision, mirror=mirror)
|
||||||
|
return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths)))
|
||||||
|
29
warpers.py
29
warpers.py
@@ -148,3 +148,32 @@ class TypicalLogitsWarper(LogitsWarper):
|
|||||||
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
|
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
|
||||||
scores = scores.masked_fill(indices_to_remove, self.filter_value)
|
scores = scores.masked_fill(indices_to_remove, self.filter_value)
|
||||||
return scores
|
return scores
|
||||||
|
|
||||||
|
|
||||||
|
class TopALogitsWarper(LogitsWarper):
|
||||||
|
def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
|
||||||
|
top_a = float(top_a)
|
||||||
|
if top_a < 0 or top_a > 1.0:
|
||||||
|
raise ValueError(f"`top_a` has to be a float >= 0 and <= 1, but is {top_a}")
|
||||||
|
self.top_a = top_a
|
||||||
|
self.filter_value = filter_value
|
||||||
|
self.min_tokens_to_keep = min_tokens_to_keep
|
||||||
|
|
||||||
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
||||||
|
if self.filter_value >= 1.0:
|
||||||
|
return scores
|
||||||
|
|
||||||
|
sorted_logits, sorted_indices = torch.sort(scores, descending=True)
|
||||||
|
probs = sorted_logits.softmax(dim=-1)
|
||||||
|
|
||||||
|
# Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)
|
||||||
|
probs_max = probs[..., 0, None]
|
||||||
|
sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a
|
||||||
|
|
||||||
|
if self.min_tokens_to_keep > 1:
|
||||||
|
# Keep at least min_tokens_to_keep
|
||||||
|
sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
|
||||||
|
|
||||||
|
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
|
||||||
|
scores = scores.masked_fill(indices_to_remove, self.filter_value)
|
||||||
|
return scores
|
||||||
|
Reference in New Issue
Block a user