diff --git a/.gitignore b/.gitignore index 9b79234b..5b024bd8 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ Uninstall # Ignore compiled Python files. *.pyc + +# Don't ignore defaults +!defaults/* \ No newline at end of file diff --git a/Uninstall.bat b/Uninstall.bat new file mode 100644 index 00000000..ed143996 --- /dev/null +++ b/Uninstall.bat @@ -0,0 +1,32 @@ +@echo off +cd /D %~dp0 +TITLE KoboldAI Uninstall Helper +SET /P M=nul +IF %M%==1 subst /D K: >nul + +IF "%1" == "FORCE" GOTO UNINSTALL + +IF EXIST "Uninstall\unins000.exe" ( + start Uninstall\unins000.exe + exit +) ELSE ( + echo This will remove all KoboldAI folders that do not contain user data + pause + GOTO UNINSTALL +) + +:UNINSTALL +echo Uninstallation in progress, please wait... +set DM=Y +attrib -h .git >nul +for /d %%D in (*) do if not "%%~nxD"=="stories" if not "%%~nxD"=="userscripts" if not "%%~nxD"=="settings" if not "%%~nxD"=="softprompts" if not "%%~nxD"=="models" if not "%%~nxD"=="Uninstall" rmdir /S /Q %%~nxD +for %%i in (*) do if not "%%i"=="Uninstall.bat" del /q "%%i" +set /P DM=Would you like to delete the models folder? (Y/n) : +IF %DM%==Y rmdir models /s /q +IF %DM%==y rmdir models /s /q +set DM=N +set /P DM=Would you like to delete all other user folders? (y/N) : +IF %DM%==Y rmdir stories userscripts settings softprompts /s /q +IF %DM%==y rmdir stories userscripts settings softprompts /s /q +del Uninstall.bat \ No newline at end of file diff --git a/aiserver.py b/aiserver.py index b52b7eb5..ff28db74 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 #==================================================================# # KoboldAI -# Version: 1.17.0 +# Version: 1.18.1 # By: KoboldAIDev and the KoboldAI Community #==================================================================# @@ -16,6 +16,9 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '1' os.environ['TOKENIZERS_PARALLELISM'] = 'false' from eventlet import tpool +import logging +logging.getLogger("urllib3").setLevel(logging.ERROR) + from os import path, getcwd import time import re @@ -23,6 +26,7 @@ import json import collections import zipfile import packaging +import packaging.version import contextlib import traceback import threading @@ -54,6 +58,27 @@ if lupa.LUA_VERSION[:2] != (5, 4): print(f"Please install lupa==1.10. You have lupa {lupa.__version__}.", file=sys.stderr) +# Make sure tqdm progress bars display properly in Colab +from tqdm.auto import tqdm +old_init = tqdm.__init__ +def new_init(self, *args, **kwargs): + old_init(self, *args, **kwargs) + if(self.ncols == 0 and kwargs.get("ncols") != 0): + self.ncols = 99 +tqdm.__init__ = new_init + +# Fix some issues with the OPT tokenizer +from transformers import PreTrainedTokenizerBase +old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__ +@classmethod +def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs): + tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs) + tokenizer._koboldai_header = tokenizer.encode("") + tokenizer.add_bos_token = False + tokenizer.add_prefix_space = False + return tokenizer +PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained + #==================================================================# # Variables & Storage #==================================================================# @@ -76,9 +101,9 @@ mainmenu = [ ["Adventure Models", "adventurelist", ""], ["Novel Models", "novellist", ""], ["NSFW Models", "nsfwlist", ""], - ["Chatbot Models", "chatlist", ""], ["Untuned GPT-Neo/J", "gptneolist", ""], ["Untuned Fairseq Dense", "fsdlist", ""], + ["Untuned OPT", "optlist", ""], ["Untuned XGLM", "xglmlist", ""], ["Untuned GPT2", "gpt2list", ""], ["Online Services", "apilist", ""], @@ -86,8 +111,10 @@ mainmenu = [ ] adventurelist= [ + ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"], ["Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"], ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"], + ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"], ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"], ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"], ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"], @@ -95,11 +122,13 @@ adventurelist= [ ] novellist= [ + ["Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"], ["Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"], ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB"], ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB"], ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB"], ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB"], + ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"], ["Horni-LN 2.7B", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"], ["Picard 2.7B (Older Janeway)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"], ["Return to Main Menu", "Return", ""], @@ -137,6 +166,17 @@ gpt2list = [ ["Return to Main Menu", "Return", ""], ] +optlist = [ + ["OPT 30B", "facebook/opt-30b", "64GB"], + ["OPT 13B", "facebook/opt-13b", "32GB"], + ["OPT 6.7B", "facebook/opt-6.7b", "16GB"], + ["OPT 2.7B", "facebook/opt-2.7b", "8GB"], + ["OPT 1.3B", "facebook/opt-1.3b", "4GB"], + ["OPT 350M", "facebook/opt-350m", "2GB"], + ["OPT 125M", "facebook/opt-125m", "1GB"], + ["Return to Main Menu", "Return", ""], + ] + fsdlist = [ ["Fairseq Dense 13B", "KoboldAI/fairseq-dense-13B", "32GB"], ["Fairseq Dense 6.7B", "KoboldAI/fairseq-dense-6.7B", "16GB"], @@ -172,7 +212,7 @@ class vars: model_type = "" # Model Type (Automatically taken from the model config) noai = False # Runs the script without starting up the transformers pipeline aibusy = False # Stops submissions while the AI is working - max_length = 1024 # Maximum number of tokens to submit per action + max_length = 2048 # Maximum number of tokens to submit per action ikmax = 3000 # Maximum number of characters to submit to InferKit genamt = 80 # Amount of text for each action to generate ikgen = 200 # Number of characters for InferKit to generate @@ -182,6 +222,7 @@ class vars: temp = 0.5 # Default generator temperature top_p = 0.9 # Default generator top_p top_k = 0 # Default generator top_k + top_a = 0.0 # Default generator top-a tfs = 1.0 # Default generator tfs (tail-free sampling) typical = 1.0 # Default generator typical sampling threshold numseqs = 1 # Number of sequences to ask the generator to create @@ -228,6 +269,8 @@ class vars: # badwords = [] # Array of str/chr values that should be removed from output badwordsids = [[13460], [6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting badwordsids_neox = [[0], [1], [44162], [9502], [12520], [31841], [36320], [49824], [34417], [6038], [34494], [24815], [26635], [24345], [3455], [28905], [44270], [17278], [32666], [46880], [7086], [43189], [37322], [17778], [20879], [49821], [3138], [14490], [4681], [21391], [26786], [43134], [9336], [683], [48074], [41256], [19181], [29650], [28532], [36487], [45114], [46275], [16445], [15104], [11337], [1168], [5647], [29], [27482], [44965], [43782], [31011], [42944], [47389], [6334], [17548], [38329], [32044], [35487], [2239], [34761], [7444], [1084], [12399], [18990], [17636], [39083], [1184], [35830], [28365], [16731], [43467], [47744], [1138], [16079], [40116], [45564], [18297], [42368], [5456], [18022], [42696], [34476], [23505], [23741], [39334], [37944], [45382], [38709], [33440], [26077], [43600], [34418], [36033], [6660], [48167], [48471], [15775], [19884], [41533], [1008], [31053], [36692], [46576], [20095], [20629], [31759], [46410], [41000], [13488], [30952], [39258], [16160], [27655], [22367], [42767], [43736], [49694], [13811], [12004], [46768], [6257], [37471], [5264], [44153], [33805], [20977], [21083], [25416], [14277], [31096], [42041], [18331], [33376], [22372], [46294], [28379], [38475], [1656], [5204], [27075], [50001], [16616], [11396], [7748], [48744], [35402], [28120], [41512], [4207], [43144], [14767], [15640], [16595], [41305], [44479], [38958], [18474], [22734], [30522], [46267], [60], [13976], [31830], [48701], [39822], [9014], [21966], [31422], [28052], [34607], [2479], [3851], [32214], [44082], [45507], [3001], [34368], [34758], [13380], [38363], [4299], [46802], [30996], [12630], [49236], [7082], [8795], [5218], [44740], [9686], [9983], [45301], [27114], [40125], [1570], [26997], [544], [5290], [49193], [23781], [14193], [40000], [2947], [43781], [9102], [48064], [42274], [18772], [49384], [9884], [45635], [43521], [31258], [32056], [47686], [21760], [13143], [10148], [26119], [44308], [31379], [36399], [23983], [46694], [36134], [8562], [12977], [35117], [28591], [49021], [47093], [28653], [29013], [46468], [8605], [7254], [25896], [5032], [8168], [36893], [38270], [20499], [27501], [34419], [29547], [28571], [36586], [20871], [30537], [26842], [21375], [31148], [27618], [33094], [3291], [31789], [28391], [870], [9793], [41361], [47916], [27468], [43856], [8850], [35237], [15707], [47552], [2730], [41449], [45488], [3073], [49806], [21938], [24430], [22747], [20924], [46145], [20481], [20197], [8239], [28231], [17987], [42804], [47269], [29972], [49884], [21382], [46295], [36676], [34616], [3921], [26991], [27720], [46265], [654], [9855], [40354], [5291], [34904], [44342], [2470], [14598], [880], [19282], [2498], [24237], [21431], [16369], [8994], [44524], [45662], [13663], [37077], [1447], [37786], [30863], [42854], [1019], [20322], [4398], [12159], [44072], [48664], [31547], [18736], [9259], [31], [16354], [21810], [4357], [37982], [5064], [2033], [32871], [47446], [62], [22158], [37387], [8743], [47007], [17981], [11049], [4622], [37916], [36786], [35138], [29925], [14157], [18095], [27829], [1181], [22226], [5709], [4725], [30189], [37014], [1254], [11380], [42989], [696], [24576], [39487], [30119], [1092], [8088], [2194], [9899], [14412], [21828], [3725], [13544], [5180], [44679], [34398], [3891], [28739], [14219], [37594], [49550], [11326], [6904], [17266], [5749], [10174], [23405], [9955], [38271], [41018], [13011], [48392], [36784], [24254], [21687], [23734], [5413], [41447], [45472], [10122], [17555], [15830], [47384], [12084], [31350], [47940], [11661], [27988], [45443], [905], [49651], [16614], [34993], [6781], [30803], [35869], [8001], [41604], [28118], [46462], [46762], [16262], [17281], [5774], [10943], [5013], [18257], [6750], [4713], [3951], [11899], [38791], [16943], [37596], [9318], [18413], [40473], [13208], [16375]] + badwordsids_opt = [[44717], [46613], [48513], [49923], [50185], [48755], [8488], [43303], [49659], [48601], [49817], [45405], [48742], [49925], [47720], [11227], [48937], [48784], [50017], [42248], [49310], [48082], [49895], [50025], [49092], [49007], [8061], [44226], [0], [742], [28578], [15698], [49784], [46679], [39365], [49281], [49609], [48081], [48906], [46161], [48554], [49670], [48677], [49721], [49632], [48610], [48462], [47457], [10975], [46077], [28696], [48709], [43839], [49798], [49154], [48203], [49625], [48395], [50155], [47161], [49095], [48833], [49420], [49666], [48443], [22176], [49242], [48651], [49138], [49750], [40389], [48021], [21838], [49070], [45333], [40862], [1], [49915], [33525], [49858], [50254], [44403], [48992], [48872], [46117], [49853], [47567], [50206], [41552], [50068], [48999], [49703], [49940], [49329], [47620], [49868], [49962], [2], [44082], [50236], [31274], [50260], [47052], [42645], [49177], [17523], [48691], [49900], [49069], [49358], [48794], [47529], [46479], [48457], [646], [49910], [48077], [48935], [46386], [48902], [49151], [48759], [49803], [45587], [48392], [47789], [48654], [49836], [49230], [48188], [50264], [46844], [44690], [48505], [50161], [27779], [49995], [41833], [50154], [49097], [48520], [50018], [8174], [50084], [49366], [49526], [50193], [7479], [49982], [3]] + fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format deletewi = None # Temporary storage for UID to delete wirmvwhtsp = False # Whether to remove leading whitespace from WI entries widepth = 3 # How many historical actions to scan for WI hits @@ -262,7 +305,7 @@ class vars: recentrngm = None # If a new random game was recently generated without Submitting after, this is the memory used (as a string), otherwise this is None useprompt = False # Whether to send the full prompt with every submit action breakmodel = False # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only - bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM only, currently) + bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J/XGLM/OPT only, currently) nobreakmodel = False # Something specifically requested Breakmodel to be disabled (For example a models config) smandelete = False # Whether stories can be deleted from inside the browser smanrename = False # Whether stories can be renamed from inside the browser @@ -274,6 +317,7 @@ class vars: acregex_ui = re.compile(r'^ *(>.*)$', re.MULTILINE) # Pattern for matching actions in the HTML-escaped story so we can apply colouring, etc (make sure to encase part to format in parentheses) comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)') # Pattern for matching comments to remove them before sending them to the AI comregex_ui = re.compile(r'(<\|(?:.|\n)*?\|>)') # Pattern for matching comments in the editor + sampler_order = utils.default_sampler_order.copy() chatmode = False chatname = "You" adventure = False @@ -288,7 +332,7 @@ class vars: quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page) debug = False # If set to true, will send debug information to the client for display lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage - use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" # Whether or not we're in a Colab TPU instance and are going to use the TPU rather than the CPU + use_colab_tpu = os.environ.get("COLAB_TPU_ADDR", "") != "" or os.environ.get("TPU_NAME", "") != "" # Whether or not we're in a Colab TPU instance or Kaggle TPU instance and are going to use the TPU rather than the CPU utils.vars = vars @@ -379,7 +423,7 @@ def device_list(n_layers, primary=None, selected=None): def device_config(config): global breakmodel, generator import breakmodel - n_layers = config.num_layers if hasattr(config, "num_layers") else config.n_layer + n_layers = utils.num_layers(config) if(args.breakmodel_gpulayers is not None): try: breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(','))) @@ -452,7 +496,7 @@ def device_config(config): # If all layers are on the same device, use the old GPU generation mode while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0): breakmodel.gpu_blocks.pop() - if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, config.num_layers if hasattr(config, "num_layers") else config.n_layer)): + if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, utils.num_layers(config))): vars.breakmodel = False vars.usegpu = True vars.gpu_device = len(breakmodel.gpu_blocks)-1 @@ -484,22 +528,33 @@ def move_model_to_devices(model): model.lm_head.to(breakmodel.primary_device) if(hasattr(model.transformer, 'wpe')): model.transformer.wpe.to(breakmodel.primary_device) - else: + elif(not hasattr(model.model, "decoder")): model.model.embed_tokens.to(breakmodel.primary_device) model.model.layer_norm.to(breakmodel.primary_device) model.lm_head.to(breakmodel.primary_device) model.model.embed_positions.to(breakmodel.primary_device) + else: + model.model.decoder.embed_tokens.to(breakmodel.primary_device) + if(model.model.decoder.project_in is not None): + model.model.decoder.project_in.to(breakmodel.primary_device) + if(model.model.decoder.project_out is not None): + model.model.decoder.project_out.to(breakmodel.primary_device) + model.model.decoder.embed_positions.to(breakmodel.primary_device) gc.collect() GPTNeoModel.forward = breakmodel.new_forward_neo if("GPTJModel" in globals()): GPTJModel.forward = breakmodel.new_forward_neo # type: ignore if("XGLMModel" in globals()): XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore + if("OPTDecoder" in globals()): + OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore generator = model.generate if(hasattr(model, "transformer")): breakmodel.move_hidden_layers(model.transformer) - else: + elif(not hasattr(model.model, "decoder")): breakmodel.move_hidden_layers(model.model, model.model.layers) + else: + breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers) #==================================================================# # Allow the models to override some settings @@ -515,13 +570,17 @@ def loadmodelsettings(): js = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r")) except Exception as e: js = {} - if vars.model_type == "xglm" or vars.model_type == "opt" or js.get("compat", "j") == "fairseq_lm": + if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm": vars.newlinemode = "s" # Default to newline mode if using XGLM + if vars.model_type == "opt": + vars.newlinemode = "ns" # Handle but don't convert newlines if using Fairseq models that have newlines trained in them vars.modelconfig = js if("badwordsids" in js): vars.badwordsids = js["badwordsids"] if("nobreakmodel" in js): vars.nobreakmodel = js["nobreakmodel"] + if("sampler_order" in js): + vars.sampler_order = js["sampler_order"] if("temp" in js): vars.temp = js["temp"] if("top_p" in js): @@ -532,6 +591,8 @@ def loadmodelsettings(): vars.tfs = js["tfs"] if("typical" in js): vars.typical = js["typical"] + if("top_a" in js): + vars.top_a = js["top_a"] if("rep_pen" in js): vars.rep_pen = js["rep_pen"] if("rep_pen_slope" in js): @@ -563,11 +624,13 @@ def savesettings(): js = {} js["apikey"] = vars.apikey js["andepth"] = vars.andepth + js["sampler_order"] = vars.sampler_order js["temp"] = vars.temp js["top_p"] = vars.top_p js["top_k"] = vars.top_k js["tfs"] = vars.tfs js["typical"] = vars.typical + js["top_a"] = vars.top_a js["rep_pen"] = vars.rep_pen js["rep_pen_slope"] = vars.rep_pen_slope js["rep_pen_range"] = vars.rep_pen_range @@ -615,88 +678,102 @@ def settingschanged(): #==================================================================# # Read settings from client file JSON and send to vars #==================================================================# + def loadsettings(): + if(path.exists("defaults/" + getmodelname().replace('/', '_') + ".settings")): + # Read file contents into JSON object + file = open("defaults/" + getmodelname().replace('/', '_') + ".settings", "r") + js = json.load(file) + + processsettings(js) + file.close() if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")): # Read file contents into JSON object file = open("settings/" + getmodelname().replace('/', '_') + ".settings", "r") js = json.load(file) - # Copy file contents to vars - if("apikey" in js): - vars.apikey = js["apikey"] - if("andepth" in js): - vars.andepth = js["andepth"] - if("temp" in js): - vars.temp = js["temp"] - if("top_p" in js): - vars.top_p = js["top_p"] - if("top_k" in js): - vars.top_k = js["top_k"] - if("tfs" in js): - vars.tfs = js["tfs"] - if("typical" in js): - vars.typical = js["typical"] - if("rep_pen" in js): - vars.rep_pen = js["rep_pen"] - if("rep_pen_slope" in js): - vars.rep_pen_slope = js["rep_pen_slope"] - if("rep_pen_range" in js): - vars.rep_pen_range = js["rep_pen_range"] - if("genamt" in js): - vars.genamt = js["genamt"] - if("max_length" in js): - vars.max_length = js["max_length"] - if("ikgen" in js): - vars.ikgen = js["ikgen"] - if("formatoptns" in js): - vars.formatoptns = js["formatoptns"] - if("numseqs" in js): - vars.numseqs = js["numseqs"] - if("widepth" in js): - vars.widepth = js["widepth"] - if("useprompt" in js): - vars.useprompt = js["useprompt"] - if("adventure" in js): - vars.adventure = js["adventure"] - if("chatmode" in js): - vars.chatmode = js["chatmode"] - if("chatname" in js): - vars.chatname = js["chatname"] - if("dynamicscan" in js): - vars.dynamicscan = js["dynamicscan"] - if("nopromptgen" in js): - vars.nopromptgen = js["nopromptgen"] - if("rngpersist" in js): - vars.rngpersist = js["rngpersist"] - if("nogenmod" in js): - vars.nogenmod = js["nogenmod"] - if("autosave" in js): - vars.autosave = js["autosave"] - if("newlinemode" in js): - vars.newlinemode = js["newlinemode"] - if("welcome" in js): - vars.welcome = js["welcome"] - - if("antemplate" in js): - vars.setauthornotetemplate = js["antemplate"] - if(not vars.gamestarted): - vars.authornotetemplate = vars.setauthornotetemplate - - if("userscripts" in js): - vars.userscripts = [] - for userscript in js["userscripts"]: - if type(userscript) is not str: - continue - userscript = userscript.strip() - if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)): - vars.userscripts.append(userscript) - - if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))): - vars.corescript = js["corescript"] - else: - vars.corescript = "default.lua" - + processsettings(js) file.close() + +def processsettings(js): +# Copy file contents to vars + if("apikey" in js): + vars.apikey = js["apikey"] + if("andepth" in js): + vars.andepth = js["andepth"] + if("sampler_order" in js): + vars.sampler_order = js["sampler_order"] + if("temp" in js): + vars.temp = js["temp"] + if("top_p" in js): + vars.top_p = js["top_p"] + if("top_k" in js): + vars.top_k = js["top_k"] + if("tfs" in js): + vars.tfs = js["tfs"] + if("typical" in js): + vars.typical = js["typical"] + if("top_a" in js): + vars.top_a = js["top_a"] + if("rep_pen" in js): + vars.rep_pen = js["rep_pen"] + if("rep_pen_slope" in js): + vars.rep_pen_slope = js["rep_pen_slope"] + if("rep_pen_range" in js): + vars.rep_pen_range = js["rep_pen_range"] + if("genamt" in js): + vars.genamt = js["genamt"] + if("max_length" in js): + vars.max_length = js["max_length"] + if("ikgen" in js): + vars.ikgen = js["ikgen"] + if("formatoptns" in js): + vars.formatoptns = js["formatoptns"] + if("numseqs" in js): + vars.numseqs = js["numseqs"] + if("widepth" in js): + vars.widepth = js["widepth"] + if("useprompt" in js): + vars.useprompt = js["useprompt"] + if("adventure" in js): + vars.adventure = js["adventure"] + if("chatmode" in js): + vars.chatmode = js["chatmode"] + if("chatname" in js): + vars.chatname = js["chatname"] + if("dynamicscan" in js): + vars.dynamicscan = js["dynamicscan"] + if("nopromptgen" in js): + vars.nopromptgen = js["nopromptgen"] + if("rngpersist" in js): + vars.rngpersist = js["rngpersist"] + if("nogenmod" in js): + vars.nogenmod = js["nogenmod"] + if("autosave" in js): + vars.autosave = js["autosave"] + if("newlinemode" in js): + vars.newlinemode = js["newlinemode"] + if("welcome" in js): + vars.welcome = js["welcome"] + + if("antemplate" in js): + vars.setauthornotetemplate = js["antemplate"] + if(not vars.gamestarted): + vars.authornotetemplate = vars.setauthornotetemplate + + if("userscripts" in js): + vars.userscripts = [] + for userscript in js["userscripts"]: + if type(userscript) is not str: + continue + userscript = userscript.strip() + if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)): + vars.userscripts.append(userscript) + + if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))): + vars.corescript = js["corescript"] + else: + vars.corescript = "default.lua" #==================================================================# # Load a soft prompt from a file @@ -760,7 +837,7 @@ def spRequest(filename): tensor = tensor.reshape( tpu_mtj_backend.params["cores_per_replica"], -1, - tpu_mtj_backend.params["d_model"], + tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]), ) vars.sp = tpu_mtj_backend.shard_xmap(np.float32(tensor)) else: @@ -782,6 +859,7 @@ parser.add_argument("--ngrok", action='store_true', help="Optimizes KoboldAI for parser.add_argument("--localtunnel", action='store_true', help="Optimizes KoboldAI for Remote Play using Localtunnel") parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for Remote Play without using a proxy service") parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") +parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)") @@ -841,6 +919,8 @@ if args.cpu: vars.smandelete = vars.host == args.override_delete vars.smanrename = vars.host == args.override_rename +vars.aria2_port = args.aria2_port or 6799 + # Select a model to run if args.model: print("Welcome to KoboldAI!\nYou have selected the following Model:", vars.model) @@ -894,12 +974,15 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)") vars.model_type = "gpt_neo" + if(vars.model_type == "opt"): + vars.badwordsids = vars.badwordsids_opt + if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): loadmodelsettings() loadsettings() print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="") vars.hascuda = torch.cuda.is_available() - vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm") and not vars.nobreakmodel + vars.bmsupported = vars.model_type in ("gpt_neo", "gptj", "xglm", "opt") and not vars.nobreakmodel if(args.breakmodel is not None and args.breakmodel): print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr) if(args.breakmodel_layers is not None): @@ -1111,17 +1194,36 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go globals()[m] = getattr(__import__("transformers"), m) except: pass + try: + from transformers.models.opt.modeling_opt import OPTDecoder + except: + pass import transformers.generation_utils from transformers import __version__ as transformers_version from transformers import PreTrainedModel + from transformers import modeling_utils old_from_pretrained = PreTrainedModel.from_pretrained.__func__ @classmethod def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): + vars.fp32_model = False + utils.num_shards = None + utils.current_shard = 0 + utils.from_pretrained_model_name = pretrained_model_name_or_path + utils.from_pretrained_index_filename = None + utils.from_pretrained_kwargs = kwargs + utils.bar = None if not args.no_aria2: utils.aria2_hook(pretrained_model_name_or_path, **kwargs) return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) PreTrainedModel.from_pretrained = new_from_pretrained + if(hasattr(modeling_utils, "get_checkpoint_shard_files")): + old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files + def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs): + utils.num_shards = utils.get_num_shards(index_filename) + utils.from_pretrained_index_filename = index_filename + return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs) + modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files # Lazy loader import torch_lazy_loader @@ -1139,6 +1241,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go ram_blocks = gpu_blocks = cumulative_gpu_blocks = None def lazy_load_callback(model_dict, f, **_): + if lazy_load_callback.nested: + return + lazy_load_callback.nested = True + device_map = {} for _key, spec in lazy_load_spec.get("layer_weights", {}).items(): @@ -1153,12 +1259,22 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map: device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" + if utils.num_shards is None or utils.current_shard == 0: + if utils.num_shards is not None: + num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs)) + else: + num_tensors = len(device_map) + print(flush=True) + utils.bar = tqdm(total=num_tensors, desc="Loading model tensors") + with zipfile.ZipFile(f, "r") as z: try: last_storage_key = None f = None current_offset = 0 - for key in tqdm(sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)), desc="Loading model tensors"): + if utils.num_shards is not None: + utils.current_shard += 1 + for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)): storage_key = model_dict[key].key if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset: last_storage_key = storage_key @@ -1175,6 +1291,8 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3) #print(f"Transferring <{key}> to {'(CPU)' if device == 'cpu' else '[device ' + str(device) + ']'} ... ", end="", flush=True) model_dict[key] = model_dict[key].materialize(f, map_location="cpu") + if model_dict[key].dtype is torch.float32: + vars.fp32_model = True if convert_to_float16 and vars.hascuda and (vars.breakmodel or vars.usegpu) and model_dict[key].dtype is torch.float32: model_dict[key] = model_dict[key].to(torch.float16) if not vars.usegpu and not vars.breakmodel and model_dict[key].dtype is torch.float16: @@ -1182,10 +1300,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go model_dict[key] = model_dict[key].to(device) #print("OK", flush=True) current_offset += nbytes + utils.bar.update(1) finally: + if utils.num_shards is None or utils.current_shard >= utils.num_shards: + utils.bar.close() + utils.bar = None + lazy_load_callback.nested = False if isinstance(f, zipfile.ZipExtFile): f.close() + lazy_load_callback.nested = False return lazy_load_callback lazy_load_config_path = os.path.join("maps", vars.model_type + ".json") @@ -1231,8 +1355,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go input_ids.clamp_(max=self.config.vocab_size-1) if(hasattr(self, "transformer")): inputs_embeds = self.transformer.wte(input_ids) - else: + elif(not hasattr(self.model, "decoder")): inputs_embeds = self.model.embed_tokens(input_ids) + else: + inputs_embeds = self.model.decoder.embed_tokens(input_ids) if(vars.sp is not None): vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device) inputs_embeds = torch.where( @@ -1240,23 +1366,42 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go vars.sp[shifted_input_ids.clamp(min=0)], inputs_embeds, ) - if(not hasattr(self, "transformer")): + if(hasattr(self, "model") and hasattr(self.model, "embed_scale")): inputs_embeds *= self.model.embed_scale kwargs['inputs_embeds'] = inputs_embeds return old_forward(self, *args, **kwargs) cls.forward = new_causallm_forward for cls in (GPT2LMHeadModel, GPTNeoForCausalLM): patch_causallm(cls) - for c in ("GPTJForCausalLM", "XGLMForCausalLM"): + for c in ("GPTJForCausalLM", "XGLMForCausalLM", "OPTForCausalLM"): try: patch_causallm(getattr(__import__("transformers"), c)) except: pass + # Fix a bug in OPTForCausalLM where self.lm_head is the wrong size + if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")): + try: + from transformers import OPTForCausalLM, OPTModel + except ImportError: + pass + else: + # This is the same as the original __init__ but with + # config.hidden_size + # replaced with + # config.word_embed_proj_dim + def new_init(self, config): + super(OPTForCausalLM, self).__init__(config) + self.model = OPTModel(config) + self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False) + self.post_init() + OPTForCausalLM.__init__ = new_init + + # Patch transformers to use our custom logit warpers from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor - from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper + from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper, TopALogitsWarper def dynamic_processor_wrap(cls, field_name, var_name, cond=None): old_call = cls.__call__ @@ -1276,6 +1421,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go cls.__call__ = new_call dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0) dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0) + dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0) dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0) dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0) dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0) @@ -1319,21 +1465,30 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go new_get_logits_processor.old_get_logits_processor = transformers.generation_utils.GenerationMixin._get_logits_processor transformers.generation_utils.GenerationMixin._get_logits_processor = new_get_logits_processor + class KoboldLogitsWarperList(LogitsProcessorList): + def __init__(self, beams: int = 1, **kwargs): + self.__warper_list: List[LogitsWarper] = [] + self.__warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1))) + self.__warper_list.append(TopALogitsWarper(top_a=0.5, min_tokens_to_keep=1 + (beams > 1))) + self.__warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1))) + self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1))) + self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1))) + self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5)) + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs): + for k in vars.sampler_order: + scores = self.__warper_list[k](input_ids, scores, *args, **kwargs) + return scores + def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList: - warper_list = LogitsProcessorList() - warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1))) - warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1))) - warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1))) - warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1))) - warper_list.append(TemperatureLogitsWarper(temperature=0.5)) - return warper_list + return KoboldLogitsWarperList(beams=beams) def new_sample(self, *args, **kwargs): assert kwargs.pop("logits_warper", None) is not None kwargs["logits_warper"] = new_get_logits_warper( beams=1, ) - if(vars.newlinemode == "s"): + if(vars.newlinemode == "s") or (vars.newlinemode == "ns"): kwargs["eos_token_id"] = -1 kwargs.setdefault("pad_token_id", 2) return new_sample.old_sample(self, *args, **kwargs) @@ -1408,12 +1563,18 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go def get_hidden_size_from_model(model): try: - return int(model.transformer.hidden_size) + return int(model.model.decoder.project_in.in_features) except: try: - return int(model.transformer.embed_dim) + return int(model.model.decoder.embed_tokens.out_features) except: - return int(model.lm_head.in_features) + try: + return int(model.transformer.hidden_size) + except: + try: + return int(model.transformer.embed_dim) + except: + return int(model.lm_head.in_features) def maybe_low_cpu_mem_usage() -> Dict[str, Any]: if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")): @@ -1468,12 +1629,16 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go import shutil shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_'))) print("\n", flush=True) - with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer) if vars.lazy_load else None, dematerialized_modules=True): + with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True): if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time lowmem = {} if(os.path.isdir(vars.custmodpth)): try: tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False) except Exception as e: try: tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") @@ -1486,6 +1651,10 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): try: tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False) except Exception as e: try: tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") @@ -1496,8 +1665,25 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go except Exception as e: model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem) else: + old_rebuild_tensor = torch._utils._rebuild_tensor + def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storage], storage_offset, shape, stride): + if(not isinstance(storage, torch_lazy_loader.LazyTensor)): + dtype = storage.dtype + else: + dtype = storage.storage_type.dtype + if(not isinstance(dtype, torch.dtype)): + dtype = storage.storage_type(0).dtype + if(dtype is torch.float32 and len(shape) >= 2): + vars.fp32_model = True + return old_rebuild_tensor(storage, storage_offset, shape, stride) + torch._utils._rebuild_tensor = new_rebuild_tensor + try: tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False) except Exception as e: try: tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") @@ -1508,11 +1694,32 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go except Exception as e: model = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem) + torch._utils._rebuild_tensor = old_rebuild_tensor + if not args.colab or args.savemodel: import shutil - model = model.half() - model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB") tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_'))) + if(vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16 + model = model.half() + model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB") + else: # For fp16 models, we can just copy the model files directly + import transformers.configuration_utils + import transformers.modeling_utils + import transformers.file_utils + # Save the config.json + shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.configuration_utils.CONFIG_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.configuration_utils.CONFIG_NAME)) + if(utils.num_shards is None): + # Save the pytorch_model.bin of an unsharded model + shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, transformers.modeling_utils.WEIGHTS_NAME, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_NAME)) + else: + with open(utils.from_pretrained_index_filename) as f: + map_data = json.load(f) + filenames = set(map_data["weight_map"].values()) + # Save the pytorch_model.bin.index.json of a sharded model + shutil.move(utils.from_pretrained_index_filename, os.path.join("models/{}".format(vars.model.replace('/', '_')), transformers.modeling_utils.WEIGHTS_INDEX_NAME)) + # Then save the pytorch_model-#####-of-#####.bin files + for filename in filenames: + shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename)) shutil.rmtree("cache/") if(vars.hascuda): @@ -1548,13 +1755,28 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") else: from transformers import PreTrainedModel + from transformers import modeling_utils old_from_pretrained = PreTrainedModel.from_pretrained.__func__ @classmethod def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): + vars.fp32_model = False + utils.num_shards = None + utils.current_shard = 0 + utils.from_pretrained_model_name = pretrained_model_name_or_path + utils.from_pretrained_index_filename = None + utils.from_pretrained_kwargs = kwargs + utils.bar = None if not args.no_aria2: utils.aria2_hook(pretrained_model_name_or_path, **kwargs) return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) PreTrainedModel.from_pretrained = new_from_pretrained + if(hasattr(modeling_utils, "get_checkpoint_shard_files")): + old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files + def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs): + utils.num_shards = utils.get_num_shards(index_filename) + utils.from_pretrained_index_filename = index_filename + return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs) + modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files def tpumtjgetsofttokens(): soft_tokens = None @@ -1562,14 +1784,14 @@ else: global np if 'np' not in globals(): import numpy as np - tensor = np.zeros((1, tpu_mtj_backend.params["d_model"]), dtype=np.float32) + tensor = np.zeros((1, tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])), dtype=np.float32) rows = tensor.shape[0] padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows tensor = np.pad(tensor, ((0, padding_amount), (0, 0))) tensor = tensor.reshape( tpu_mtj_backend.params["cores_per_replica"], -1, - tpu_mtj_backend.params["d_model"], + tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]), ) vars.sp = tpu_mtj_backend.shard_xmap(tensor) soft_tokens = np.arange( @@ -1631,11 +1853,13 @@ else: def tpumtjgenerate_settings_callback() -> dict: return { + "sampler_order": vars.sampler_order, "top_p": float(vars.top_p), "temp": float(vars.temp), "top_k": int(vars.top_k), "tfs": float(vars.tfs), "typical": float(vars.typical), + "top_a": float(vars.top_a), "repetition_penalty": float(vars.rep_pen), "rpslope": float(vars.rep_pen_slope), "rprange": int(vars.rep_pen_range), @@ -1658,7 +1882,7 @@ else: if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)): raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder") import tpu_mtj_backend - if(vars.model == "TPUMeshTransformerGPTNeoX"): + if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"): tpu_mtj_backend.pad_token_id = 1 tpu_mtj_backend.vars = vars tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback @@ -1670,7 +1894,7 @@ else: loadmodelsettings() loadsettings() tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig) - vars.modeldim = int(tpu_mtj_backend.params["d_model"]) + vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])) tokenizer = tpu_mtj_backend.tokenizer else: loadsettings() @@ -1998,6 +2222,7 @@ def lua_has_setting(setting): "settopk", "settfs", "settypical", + "settopa", "setreppen", "setreppenslope", "setreppenrange", @@ -2017,6 +2242,7 @@ def lua_has_setting(setting): "top_k", "tfs", "typical", + "topa", "reppen", "reppenslope", "reppenrange", @@ -2051,6 +2277,7 @@ def lua_get_setting(setting): if(setting in ("settopk", "topk", "top_k")): return vars.top_k if(setting in ("settfs", "tfs")): return vars.tfs if(setting in ("settypical", "typical")): return vars.typical + if(setting in ("settopa", "topa")): return vars.top_a if(setting in ("setreppen", "reppen")): return vars.rep_pen if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range @@ -2086,6 +2313,7 @@ def lua_set_setting(setting, v): if(setting in ("settopk", "topk")): vars.top_k = v if(setting in ("settfs", "tfs")): vars.tfs = v if(setting in ("settypical", "typical")): vars.typical = v + if(setting in ("settopa", "topa")): vars.top_a = v if(setting in ("setreppen", "reppen")): vars.rep_pen = v if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v @@ -2510,6 +2738,11 @@ def get_message(msg): emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True) settingschanged() refresh_settings() + elif(msg['cmd'] == 'settopa'): + vars.top_a = float(msg['data']) + emit('from_server', {'cmd': 'setlabeltopa', 'data': msg['data']}, broadcast=True) + settingschanged() + refresh_settings() elif(msg['cmd'] == 'setreppen'): vars.rep_pen = float(msg['data']) emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True) @@ -2663,6 +2896,8 @@ def get_message(msg): elif(msg['cmd'] == 'uslistrequest'): unloaded, loaded = getuslist() emit('from_server', {'cmd': 'buildus', 'data': {"unloaded": unloaded, "loaded": loaded}}) + elif(msg['cmd'] == 'samplerlistrequest'): + emit('from_server', {'cmd': 'buildsamplers', 'data': vars.sampler_order}) elif(msg['cmd'] == 'usloaded'): vars.userscripts = [] for userscript in msg['data']: @@ -2676,6 +2911,16 @@ def get_message(msg): load_lua_scripts() unloaded, loaded = getuslist() sendUSStatItems() + elif(msg['cmd'] == 'samplers'): + sampler_order = msg["data"] + if(not isinstance(sampler_order, list)): + raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}") + if(len(sampler_order) != len(vars.sampler_order)): + raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}") + if(not all(isinstance(e, int) for e in sampler_order)): + raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element") + vars.sampler_order = sampler_order + settingschanged() elif(msg['cmd'] == 'loadselect'): vars.loadselect = msg["data"] elif(msg['cmd'] == 'spselect'): @@ -3104,24 +3349,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, global tokenizer tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") + lnheader = len(tokenizer._koboldai_header) + # Calculate token budget prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True) lnprompt = len(prompttkns) memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True) lnmem = len(memtokens) - if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction): + if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction): raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True) lnwi = len(witokens) - if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction): + if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction): raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") if(anotetxt != ""): anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True) lnanote = len(anotetkns) - if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction): + if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction): raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.") if(vars.useprompt): @@ -3132,14 +3379,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0 maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0 - if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction): + if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction): raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.") assert budget >= 0 if(actionlen == 0): # First/Prompt action - tokens = memtokens + witokens + anotetkns + prompttkns + tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction ln = len(tokens) + lnsp return tokens, ln+1, ln+vars.genamt @@ -3187,12 +3434,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, # Did we get to add the A.N.? If not, do it here if(anotetxt != ""): if((not anoteadded) or forceanote): - tokens = memtokens + witokens + anotetkns + prompttkns + tokens + tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens else: - tokens = memtokens + witokens + prompttkns + tokens + tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens else: # Prepend Memory, WI, and Prompt before action tokens - tokens = memtokens + witokens + prompttkns + tokens + tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens # Send completed bundle to generator assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction @@ -3570,6 +3817,7 @@ def sendtocolab(txt, min, max): 'top_k': vars.top_k, 'tfs': vars.tfs, 'typical': vars.typical, + 'topa': vars.top_a, 'numseqs': vars.numseqs, 'retfultxt': False } @@ -3707,12 +3955,14 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): top_k=vars.top_k, tfs=vars.tfs, typical=vars.typical, + top_a=vars.top_a, numseqs=vars.numseqs, repetition_penalty=vars.rep_pen, rpslope=vars.rep_pen_slope, rprange=vars.rep_pen_range, soft_embeddings=vars.sp, soft_tokens=soft_tokens, + sampler_order=vars.sampler_order, ) past = genout for i in range(vars.numseqs): @@ -3893,6 +4143,7 @@ def refresh_settings(): emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True) emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True) emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True) + emit('from_server', {'cmd': 'updatetopa', 'data': vars.top_a}, broadcast=True) emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True) emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True) emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True) @@ -4469,6 +4720,7 @@ def oairequest(txt, min, max): 'prompt': txt, 'max_tokens': vars.genamt, 'temperature': vars.temp, + 'top_a': vars.top_a, 'top_p': vars.top_p, 'top_k': vars.top_k, 'tfs': vars.tfs, diff --git a/breakmodel.py b/breakmodel.py index 9818e6d9..eb49e669 100644 --- a/breakmodel.py +++ b/breakmodel.py @@ -633,11 +633,11 @@ def new_forward_xglm( layer_outputs = decoder_layer( hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states, attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask, - encoder_hidden_states=encoder_hidden_states.to(device) if encoder_hidden_states is not None else None, - encoder_attention_mask=encoder_attention_mask.to(device) if encoder_attention_mask is not None else None, - layer_head_mask=((head_mask[idx].to(device) if head_mask[idx] is not None else None) if head_mask is not None else None), + encoder_hidden_states=encoder_hidden_states.to(device) if breakmodel and encoder_hidden_states is not None else encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask.to(device) if breakmodel and encoder_attention_mask is not None else encoder_attention_mask, + layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None), cross_attn_layer_head_mask=( - (cross_attn_head_mask[idx].to(device) if cross_attn_head_mask[idx] is not None else None) if cross_attn_head_mask is not None else None + (cross_attn_head_mask[idx].to(device) if breakmodel and cross_attn_head_mask[idx] is not None else cross_attn_head_mask[idx]) if cross_attn_head_mask is not None else None ), past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value, output_attentions=output_attentions, @@ -686,3 +686,177 @@ def new_forward_xglm( attentions=all_self_attns, cross_attentions=all_cross_attentions, ) + + +def new_forward_opt( + self, + input_ids=None, + attention_mask=None, + head_mask=None, + past_key_values=None, + inputs_embeds=None, + use_cache=None, + output_attentions=None, + output_hidden_states=None, + return_dict=None, +): + assert len(gpu_blocks) <= torch.cuda.device_count() + assert sum(gpu_blocks) <= len(self.layers) + ram_blocks = len(self.layers) - sum(gpu_blocks) + cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) + + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time") + elif input_ids is not None: + input_shape = input_ids.size() + input_ids = input_ids.view(-1, input_shape[-1]) + elif inputs_embeds is not None: + input_shape = inputs_embeds.size()[:-1] + else: + raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds") + + past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 + + if inputs_embeds is None: + if breakmodel: + input_ids = input_ids.to(primary_device) + inputs_embeds = self.embed_tokens(input_ids) + + # embed positions + if breakmodel: + inputs_embeds = inputs_embeds.to(primary_device) + if attention_mask is None: + attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device) + + positions = self.embed_positions(attention_mask)[:, past_key_values_length:, :] + if breakmodel: + positions = positions.to(primary_device) + + attention_mask = self._prepare_decoder_attention_mask( + attention_mask, input_shape, inputs_embeds, past_key_values_length + ) + + if self.project_in is not None: + inputs_embeds = self.project_in(inputs_embeds) + + hidden_states = inputs_embeds + positions + + hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + + if breakmodel and ram_blocks: + copystream = torch.cuda.Stream(device=primary_device, priority=-1) + + # check if head_mask has a correct number of layers specified if desired + for attn_mask, mask_name in zip([head_mask], ["head_mask"]): + if attn_mask is not None: + if attn_mask.size()[0] != (len(self.layers)): + raise ValueError( + f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for" + f" {head_mask.size()[0]}." + ) + + for idx, decoder_layer in enumerate(self.layers): + i = idx + if breakmodel: + if i in range(ram_blocks): + index1 = (i+1)%ram_blocks + for param1,param2 in zip(self.layers[index1].parameters(),self.layers[(i-1)%ram_blocks].parameters()): + param1.data = param2.data + for param1,param2 in zip(self.layers[index1].parameters(),self.extrastorage[index1].parameters()): + with torch.cuda.stream(copystream): + torch.cuda.comm.broadcast(param2.data,out = [param1.data]) + + # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) + if output_hidden_states: + all_hidden_states += (hidden_states,) + dropout_probability = random.uniform(0, 1) + if self.training and (dropout_probability < self.layerdrop): + continue + + past_key_value = past_key_values[idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + + if use_cache: + logger.warning( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + def create_custom_forward(module): + def custom_forward(*inputs): + # None for past_key_value + return module(*inputs, output_attentions, None) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(decoder_layer), + hidden_states, + attention_mask, + head_mask[idx] if head_mask is not None else None, + None, + ) + else: + if breakmodel: + device = primary_device if i < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, i - ram_blocks) + layer_outputs = decoder_layer( + hidden_states.to(device) if breakmodel and hidden_states is not None else hidden_states, + attention_mask=attention_mask.to(device) if breakmodel and attention_mask is not None else attention_mask, + layer_head_mask=((head_mask[idx].to(device) if breakmodel and head_mask[idx] is not None else head_mask[idx]) if head_mask is not None else None), + past_key_value=tuple(v.to(device) for v in past_key_value if v is not None) if breakmodel and past_key_value is not None and i >= ram_blocks and len(past_key_value) and past_key_value[0].device.index != device else past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += (layer_outputs[2 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + if breakmodel: + if i in range(ram_blocks): + torch.cuda.synchronize() + torch.cuda.empty_cache() + + if breakmodel: + if ram_blocks: + del copystream + torch.cuda.empty_cache() + hidden_states = hidden_states.to(primary_device) + if self.project_out is not None: + hidden_states = self.project_out(hidden_states) + if breakmodel: + hidden_states = hidden_states.to(primary_device) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) diff --git a/bridge.lua b/bridge.lua index ed0941c6..fc6c8823 100644 --- a/bridge.lua +++ b/bridge.lua @@ -867,6 +867,7 @@ return function(_python, _bridged) ---@field settopk integer ---@field settfs number ---@field settypical number + ---@field settopa number ---@field setreppen number ---@field setreppenslope number ---@field setreppenrange number @@ -884,6 +885,7 @@ return function(_python, _bridged) ---@field top_k integer ---@field tfs number ---@field typical number + ---@field topa number ---@field reppen number ---@field reppenslope number ---@field reppenrange number diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb index 47c31f27..0e86c4a5 100644 --- a/colab/GPU.ipynb +++ b/colab/GPU.ipynb @@ -7,7 +7,7 @@ "private_outputs": true, "provenance": [], "collapsed_sections": [], - "authorship_tag": "ABX9TyOKIa/NDLlYI5j63GXPtkXv", + "authorship_tag": "ABX9TyPbwW79K9/RkYH9i9rkYFyj", "include_colab_link": true }, "kernelspec": { @@ -68,14 +68,20 @@ "#@title <-- Click this to start KoboldAI\n", "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n", "\n", - "Model = \"KoboldAI/GPT-Neo-2.7B-Janeway\" #@param [\"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n", + "Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\" #@param [\"KoboldAI/fairseq-dense-2.7B-Nerys\", \"KoboldAI/GPT-Neo-2.7B-Janeway\", \"KoboldAI/GPT-Neo-2.7B-AID\", \"KoboldAI/GPT-Neo-2.7B-Picard\", \"KoboldAI/GPT-Neo-2.7B-Horni-LN\", \"KoboldAI/GPT-Neo-2.7B-Horni\", \"KoboldAI/GPT-Neo-2.7B-Shinen\", \"EleutherAI/gpt-neo-2.7B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", + "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", "\n", "!nvidia-smi\n", "from google.colab import drive\n", "drive.mount('/content/drive/')\n", "\n", - "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version" + "if Provider == \"Localtunnel\":\n", + " tunnel = \"--localtunnel yes\"\n", + "else:\n", + " tunnel = \"\"\n", + "\n", + "!wget https://henk.tech/ckds -O - | bash /dev/stdin -m $Model -g $Version $tunnel" ], "execution_count": null, "outputs": [] @@ -84,27 +90,32 @@ "cell_type": "markdown", "source": [ "# GPU Edition Model Descriptions\n", - "| Model | Size | Style | Description |\n", - "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n", - "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", - "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n", - "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n", - "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n", + "| Model | Size | Style | Description |\n", + "| --- | --- | --- | --- |\n", + "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n", + "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", + "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n", + "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n", + "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n", "\n", "# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n", "\n", - "| Model | Size | Style | Drive Space | Description |\n", - "| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |\n", - "| Skein 6B by VE_FORBDRYDERNE | 6B TPU | Hybrid | 0 GB | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |\n", - "| Janeway 6B by Mr Seeker | 6B TPU | Novel | 0 GB | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", - "| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", - "| Lit 6B by Haru | 6B TPU | NSFW | 8 GB / 12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", - "| Shinen 6B by Mr Seeker | 6B TPU | NSFW | 0 GB | Shinen is an alternative to the Lit model designed to be more explicit. If Lit is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", - "| Generic 6B by EleutherAI | 6B TPU | Generic | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |\n", - "| C1 6B by Haru | 6B TPU | Chatbot | 8 GB / 12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n", + "| Model | Size | Style | Description |\n", + "| --- | --- | --- | --- |\n", + "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n", + "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", + "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n", + "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", + "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", + "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", + "| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n", + "| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n", + "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n", + "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n", + "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n", "\n", "\n", "| Style | Description |\n", @@ -113,7 +124,6 @@ "| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n", "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n", "| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n", - "| Hybrid | Hybrid models are a blend between different styles, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playstyles and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |\n", "| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n", "\n", "# How to start KoboldAI in 7 simple steps\n", diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 0ff1fcc1..89f53af2 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -7,7 +7,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -65,8 +65,8 @@ "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n", "\n", "#@title <-- Click this to start KoboldAI\n", - "Model = \"Janeway 13B\" #@param [\"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"Convo 6B\", \"C1 6B\", \"NeoX 20B\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n", - "Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n", + "Model = \"Nerys 13B\" #@param [\"Nerys 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit 6B\", \"NeoX 20B\", \"facebook/opt-13b\", \"KoboldAI/fairseq-dense-13B\", \"EleutherAI/gpt-j-6B\"] {allow-input: true}\n", + "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", "\n", "import os\n", @@ -84,6 +84,10 @@ " Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n", " path = \"\"\n", " download = \"\"\n", + "elif Model == \"Nerys 13B\":\n", + " Model = \"KoboldAI/fairseq-dense-13B-Nerys\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Shinen 13B\":\n", " Model = \"KoboldAI/fairseq-dense-13B-Shinen\"\n", " path = \"\"\n", @@ -97,41 +101,25 @@ " Drive = \"Unextracted (Less Space)\"\n", " ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.03,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-neox-20b-jax.settings\n", "elif Model == \"Skein 6B\":\n", - " Model = \"TPUMeshTransformerGPTJ\"\n", - " path = \" -p gpt-j-6b-skein-jax\"\n", - " location = \"colab\"\n", - " download = \" -a https://storage.henk.tech/KoboldAI/skein-jax.txt\"\n", - " extract = \"\"\n", - " Drive = \"Unextracted (Less Space)\"\n", - " ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-skein-jax.settings\n", + " Model = \"KoboldAI/GPT-J-6B-Skein\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Janeway 6B\":\n", - " Model = \"TPUMeshTransformerGPTJ\"\n", - " path = \" -p gpt-j-6b-janeway-jax\"\n", - " location = \"colab\"\n", - " download = \" -a https://storage.henk.tech/KoboldAI/janeway-jax.txt\"\n", - " extract = \"\"\n", - " Drive = \"Unextracted (Less Space)\"\n", - " ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-janeway-jax.settings\n", + " Model = \"KoboldAI/GPT-J-6B-Janeway\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Adventure 6B\":\n", - " Model = \"TPUMeshTransformerGPTJ\"\n", - " path = \" -p gpt-j-6b-adventure-jax\"\n", - " location = \"colab\"\n", - " download = \" -a https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-jax/aria2.txt\"\n", - " extract = \"\"\n", - " Drive = \"Unextracted (Less Space)\"\n", - " ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": true\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-adventure-jax.settings\n", + " Model = \"KoboldAI/GPT-J-6B-Adventure\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Lit 6B\":\n", " Model = \"hakurei/lit-6B\"\n", " path = \"\"\n", " download = \"\"\n", "elif Model == \"Shinen 6B\":\n", - " Model = \"TPUMeshTransformerGPTJ\"\n", - " path = \" -p gpt-j-6b-shinen-jax\"\n", - " location = \"colab\"\n", - " download = \" -a https://storage.henk.tech/KoboldAI/shinen-jax.txt\"\n", - " extract = \"\"\n", - " Drive = \"Unextracted (Less Space)\"\n", - " ![[ -f /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings ]] || echo -e \"{\\n \\\"apikey\\\": \\\"\\\",\\n \\\"andepth\\\": 3,\\n \\\"temp\\\": 0.5,\\n \\\"top_p\\\": 0.9,\\n \\\"top_k\\\": 0,\\n \\\"tfs\\\": 1.0,\\n \\\"rep_pen\\\": 1.1,\\n \\\"rep_pen_slope\\\": 0.7,\\n \\\"rep_pen_range\\\": 1024.0,\\n \\\"genamt\\\": 80,\\n \\\"max_length\\\": 2048,\\n \\\"ikgen\\\": 200,\\n \\\"formatoptns\\\": {\\n \\\"frmttriminc\\\": true,\\n \\\"frmtrmblln\\\": false,\\n \\\"frmtrmspch\\\": false,\\n \\\"frmtadsnsp\\\": false,\\n \\\"singleline\\\": false\\n },\\n \\\"numseqs\\\": 1,\\n \\\"widepth\\\": 3,\\n \\\"useprompt\\\": true,\\n \\\"adventure\\\": false,\\n \\\"chatmode\\\": false,\\n \\\"chatname\\\": \\\"You\\\",\\n \\\"dynamicscan\\\": false,\\n \\\"nopromptgen\\\": false,\\n \\\"rngpersist\\\": false,\\n \\\"nogenmod\\\": false,\\n \\\"autosave\\\": false,\\n \\\"welcome\\\": false,\\n \\\"newlinemode\\\": \\\"n\\\",\\n \\\"antemplate\\\": \\\"[Genre: <|>]\\\",\\n \\\"userscripts\\\": [],\\n \\\"corescript\\\": \\\"default.lua\\\",\\n \\\"softprompt\\\": \\\"\\\"\\n}\" > /content/drive/MyDrive/KoboldAI/settings/gpt-j-6b-shinen-jax.settings\n", + " Model = \"KoboldAI/GPT-J-6B-Shinen\"\n", + " path = \"\"\n", + " download = \"\"\n", "elif Model == \"Convo 6B\":\n", " Model = \"hitomi-team/convo-6B\"\n", " path = \"\"\n", @@ -159,37 +147,37 @@ "\n", "| Model | Size | Style | Description |\n", "| --- | --- | --- | --- |\n", - "| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", - "| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n", - "| Skein by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", - "| Adventure by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", - "| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", - "| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |\n", - "| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |\n", + "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n", + "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", + "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n", + "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n", + "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n", + "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n", "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n", - "| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n", - "| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n", + "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n", + "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n", "\n", "\n", "# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n", "\n", - "| Model | Size | Style | Description |\n", - "| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |\n", - "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", - "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n", - "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n", - "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", - "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n", + "| Model | Size | Style | Description |\n", + "| --- | --- | --- | --- |\n", + "| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n", + "| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n", + "| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n", + "| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n", + "| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n", + "| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n", "\n", - "| Style | Description |\n", - "| --------- | ------------------------------------------------------------ |\n", - "| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n", - "| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n", + "| Style | Description |\n", + "| --- | --- |\n", + "| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |\n", + "| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |\n", "| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |\n", - "| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n", - "| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n", + "| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |\n", + "| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |\n", "\n", "---\n", "## Tips to get the most out of Google Colab\n", diff --git a/environments/huggingface.yml b/environments/huggingface.yml index e518be3b..205d5e31 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -20,4 +20,5 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers>=4.17 \ No newline at end of file + - transformers>=4.20.1 + - accelerate \ No newline at end of file diff --git a/environments/rocm.yml b/environments/rocm.yml index 2ca9c670..8ade341f 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -20,4 +20,5 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers>=4.17 + - transformers>=4.20.1 + - accelerate diff --git a/gensettings.py b/gensettings.py index e8d4e566..b3007c91 100644 --- a/gensettings.py +++ b/gensettings.py @@ -64,6 +64,17 @@ gensettingstf = [ "step": 0.05, "default": 1.0, "tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect." + }, + { + "uitype": "slider", + "unit": "float", + "label": "Top a Sampling", + "id": "settopa", + "min": 0.0, + "max": 1.0, + "step": 0.01, + "default": 0.0, + "tooltip": "Alternative sampling method that reduces the randomness of the AI whenever the probability of one token is much higher than all the others. Higher values have a stronger effect. Set this setting to 0 to disable its effect." }, { "uitype": "slider", diff --git a/koboldai.ico b/koboldai.ico new file mode 100644 index 00000000..4e39d0de Binary files /dev/null and b/koboldai.ico differ diff --git a/koboldaiblue.ico b/koboldaiblue.ico new file mode 100644 index 00000000..944d7539 Binary files /dev/null and b/koboldaiblue.ico differ diff --git a/koboldaigreen.ico b/koboldaigreen.ico new file mode 100644 index 00000000..63622231 Binary files /dev/null and b/koboldaigreen.ico differ diff --git a/maps/opt.json b/maps/opt.json new file mode 100644 index 00000000..59d41cd5 --- /dev/null +++ b/maps/opt.json @@ -0,0 +1,37 @@ +{ + "mtj_compat": "opt", + "mtj_pe": "fixed", + "mtj_config_map": { + "do_layer_norm_before": ["do_layer_norm_before", true], + "d_embed": "word_embed_proj_dim", + "d_model": "hidden_size", + "n_heads": "num_attention_heads", + "layers": "num_hidden_layers" + }, + "static_weights": { + "decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}}, + "decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}}, + "decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}}, + "decoder.final_layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}}, + "decoder.final_layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}, + "decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}} + }, + "layer_weights": { + "decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}}, + "decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}}, + "decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}}, + "decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}}, + "decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}}, + "decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}}, + "decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}}, + "decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}}, + "decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}}, + "decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}}, + "decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}}, + "decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}}, + "decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}}, + "decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}}, + "decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}}, + "decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}} + } +} diff --git a/readme.md b/readme.md index 2dd4c57c..a136c856 100644 --- a/readme.md +++ b/readme.md @@ -50,49 +50,50 @@ Each edition features different models and requires different hardware to run, t ### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb) +| Model | Size | Style | Description | +| --- | --- | --- | --- | +| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. | +| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. | +| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. | +| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. | +| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). | +| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. | +| [Convo](https://huggingface.co/hitomi-team/convo-6B) by Hitomi Team | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). | +| [C1](https://huggingface.co/hakurei/c1-6B) by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. | +| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. | +| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. | +| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. | + +## [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb) | Model | Size | Style | Description | | --- | --- | --- | --- | -| Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. | -| Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. | -| Skein by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. | -| Adventure by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). | -| Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. | -| Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). | -| C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. | -| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. | -| Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. | -| GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. | +| [Fairseq-Dense-2.7B-Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. | +| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. | +| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. | +| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. | +| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. | - -# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb) - -| Model | Size | Style | Description | -| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ | -| [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. | -| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. | -| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. | -| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. | -| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. | -| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. | -| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. | - -| Style | Description | -| --------- | ------------------------------------------------------------ | -| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. | -| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. | +| Style | Description | +| --- | --- | +| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. | +| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. | | Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. | -| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. | -| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. | +| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. | +| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. | ---- ## Tips to get the most out of Google Colab -- Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's. -- KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own. -- Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option. -- Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session. -- Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU. -- Models stored on Google Drive typically load faster than models we need to download from the internet. + +* Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's. +* KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own. +* Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option. +* Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session. +* Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU. +* Models stored on Google Drive typically load faster than models we need to download from the internet. + ### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb) | Model | Size | Type | Description | diff --git a/requirements.txt b/requirements.txt index 897f9e8e..13fdd7cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers>=4.17 +transformers>=4.20.1 Flask Flask-SocketIO requests @@ -11,3 +11,4 @@ markdown bleach==4.1.0 sentencepiece protobuf +accelerate \ No newline at end of file diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 416a06a4..0f723a49 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests optax >= 0.0.5, <= 0.0.9 dm-haiku == 0.0.5 jax == 0.2.21 -transformers >= 4.17 +transformers >= 4.19 progressbar2 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck flask diff --git a/static/application.js b/static/application.js index 63f804a3..3cddea87 100644 --- a/static/application.js +++ b/static/application.js @@ -20,6 +20,7 @@ var button_settings; var button_format; var button_softprompt; var button_userscripts; +var button_samplers; var button_mode; var button_mode_label; var button_send; @@ -106,6 +107,12 @@ var using_webkit_patch = true; var shift_down = false; var do_clear_ent = false; +// Whether or not an entry in the Userscripts menu is being dragged +var us_dragging = false; + +// Whether or not an entry in the Samplers menu is being dragged +var samplers_dragging = false; + // Display vars var allowtoggle = false; var formatcount = 0; @@ -173,20 +180,36 @@ function addSetting(ob) { window["setting_"+ob.id] = refin; // Is this still needed? window["label_"+ob.id] = reflb; // Is this still needed? // Add event function to input + var updateLabelColor = function () { + var value = (ob.unit === "float" ? parseFloat : parseInt)(reflb.val()); + if(value > ob.max || value < ob.min) { + reflb.addClass("setting-value-warning"); + } else { + reflb.removeClass("setting-value-warning"); + } + } var send = function () { sliders_throttle(ob.id, function () { - socket.send({'cmd': $(refin).attr('id'), 'data': $(refin).val()}); + socket.send({'cmd': $(refin).attr('id'), 'data': $(reflb).val()}); }); - reflb.val($(refin).val()); } - refin.on("input", send); + refin.on("input", function (event) { + reflb.val(refin.val()); + updateLabelColor(); + send(); + }).on("change", updateLabelColor); reflb.on("change", function (event) { var value = (ob.unit === "float" ? parseFloat : parseInt)(event.target.value); - if(Number.isNaN(value) || value > ob.max || value < ob.min) { + if(Number.isNaN(value) || (ob.min >= 0 && value < 0)) { event.target.value = refin.val(); return; } + if (ob.unit === "float") { + value = parseFloat(value.toFixed(3)); // Round to 3 decimal places to help avoid the number being too long to fit in the box + } refin.val(value); + reflb.val(value); + updateLabelColor(); send(); }); } else if(ob.uitype == "toggle"){ @@ -957,6 +980,16 @@ function hideUSPopup() { spcontent.html(""); } +function showSamplersPopup() { + samplerspopup.removeClass("hidden"); + samplerspopup.addClass("flex"); +} + +function hideSamplersPopup() { + samplerspopup.removeClass("flex"); + samplerspopup.addClass("hidden"); +} + function buildLoadList(ar) { disableButtons([load_accept]); loadcontent.html(""); @@ -1090,6 +1123,29 @@ function buildUSList(unloaded, loaded) { } } +function buildSamplerList(samplers) { + samplerslist.html(""); + showSamplersPopup(); + var i; + var samplers_lookup_table = [ + "Top-k Sampling", + "Top-a Sampling", + "Top-p Sampling", + "Tail-free Sampling", + "Typical Sampling", + "Temperature", + ] + for(i=0; i\ +
\ +
\ +
"+samplers_lookup_table[samplers[i]]+"
\ +
\ +
\ + "); + } +} + function highlightLoadLine(ref) { $("#loadlistcontent > div > div.popuplistselected").removeClass("popuplistselected"); ref.addClass("popuplistselected"); @@ -1819,6 +1875,7 @@ $(document).ready(function(){ button_format = $('#btn_format'); button_softprompt = $("#btn_softprompt"); button_userscripts= $("#btn_userscripts"); + button_samplers = $("#btn_samplers"); button_mode = $('#btnmode') button_mode_label = $('#btnmode_label') button_send = $('#btnsend'); @@ -1867,6 +1924,10 @@ $(document).ready(function(){ usloaded = $("#uslistloaded"); us_accept = $("#btn_usaccept"); us_close = $("#btn_usclose"); + samplerspopup = $("#samplerscontainer"); + samplerslist = $("#samplerslist"); + samplers_accept = $("#btn_samplersaccept"); + samplers_close = $("#btn_samplersclose"); nspopup = $("#newgamecontainer"); ns_accept = $("#btn_nsaccept"); ns_close = $("#btn_nsclose"); @@ -1889,7 +1950,7 @@ $(document).ready(function(){ modelname = msg.modelname; } refreshTitle(); - connect_status.html("Connected to KoboldAI Process!"); + connect_status.html("Connected to KoboldAI!"); connect_status.removeClass("color_orange"); connect_status.addClass("color_green"); // Reset Menus @@ -2059,48 +2120,52 @@ $(document).ready(function(){ newTextHighlight($("#n"+msg.data)) } else if(msg.cmd == "updatetemp") { // Send current temp value to input - $("#settemp").val(parseFloat(msg.data)); $("#settempcur").val(msg.data); + $("#settemp").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatetopp") { // Send current top p value to input - $("#settopp").val(parseFloat(msg.data)); $("#settoppcur").val(msg.data); + $("#settopp").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatetopk") { // Send current top k value to input - $("#settopk").val(parseFloat(msg.data)); $("#settopkcur").val(msg.data); + $("#settopk").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatetfs") { // Send current tfs value to input - $("#settfs").val(parseFloat(msg.data)); $("#settfscur").val(msg.data); + $("#settfs").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatetypical") { // Send current typical value to input - $("#settypical").val(parseFloat(msg.data)); $("#settypicalcur").val(msg.data); + $("#settypical").val(parseFloat(msg.data)).trigger("change"); + } else if(msg.cmd == "updatetopa") { + // Send current top a value to input + $("#settopacur").val(msg.data); + $("#settopa").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatereppen") { // Send current rep pen value to input - $("#setreppen").val(parseFloat(msg.data)); $("#setreppencur").val(msg.data); + $("#setreppen").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatereppenslope") { // Send current rep pen value to input - $("#setreppenslope").val(parseFloat(msg.data)); $("#setreppenslopecur").val(msg.data); + $("#setreppenslope").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updatereppenrange") { // Send current rep pen value to input - $("#setreppenrange").val(parseFloat(msg.data)); $("#setreppenrangecur").val(msg.data); + $("#setreppenrange").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updateoutlen") { // Send current output amt value to input - $("#setoutput").val(parseInt(msg.data)); $("#setoutputcur").val(msg.data); + $("#setoutput").val(parseInt(msg.data)).trigger("change"); } else if(msg.cmd == "updatetknmax") { // Send current max tokens value to input - $("#settknmax").val(parseInt(msg.data)); $("#settknmaxcur").val(msg.data); + $("#settknmax").val(parseInt(msg.data)).trigger("change"); } else if(msg.cmd == "updateikgen") { // Send current max tokens value to input - $("#setikgen").val(parseInt(msg.data)); $("#setikgencur").val(msg.data); + $("#setikgen").val(parseInt(msg.data)).trigger("change"); } else if(msg.cmd == "setlabeltemp") { // Update setting label with value from server $("#settempcur").val(msg.data); @@ -2116,6 +2181,9 @@ $(document).ready(function(){ } else if(msg.cmd == "setlabeltypical") { // Update setting label with value from server $("#settypicalcur").val(msg.data); + } else if(msg.cmd == "setlabeltypical") { + // Update setting label with value from server + $("#settopa").val(msg.data); } else if(msg.cmd == "setlabelreppen") { // Update setting label with value from server $("#setreppencur").val(msg.data); @@ -2284,6 +2352,8 @@ $(document).ready(function(){ buildSPList(msg.data); } else if(msg.cmd == "buildus") { buildUSList(msg.data.unloaded, msg.data.loaded); + } else if(msg.cmd == "buildsamplers") { + buildSamplerList(msg.data); } else if(msg.cmd == "askforoverwrite") { // Show overwrite warning show([$(".saveasoverwrite")]); @@ -2304,15 +2374,15 @@ $(document).ready(function(){ $("#setnumseqcur").html(msg.data); } else if(msg.cmd == "updatenumseq") { // Send current max tokens value to input - $("#setnumseq").val(parseInt(msg.data)); $("#setnumseqcur").html(msg.data); + $("#setnumseq").val(parseInt(msg.data)).trigger("change"); } else if(msg.cmd == "setlabelwidepth") { // Update setting label with value from server $("#setwidepthcur").html(msg.data); } else if(msg.cmd == "updatewidepth") { // Send current max tokens value to input - $("#setwidepth").val(parseInt(msg.data)); $("#setwidepthcur").html(msg.data); + $("#setwidepth").val(parseInt(msg.data)).trigger("change"); } else if(msg.cmd == "updateuseprompt") { // Update toggle state $("#setuseprompt").prop('checked', msg.data).change(); @@ -2396,9 +2466,39 @@ $(document).ready(function(){ }, 2); }); + var us_click_handler = function(ev) { + setTimeout(function() { + if (us_dragging) { + return; + } + var target = $(ev.target).closest(".uslistitem")[0]; + if ($.contains(document.getElementById("uslistunloaded"), target)) { + document.getElementById("uslistloaded").appendChild(target); + } else { + document.getElementById("uslistunloaded").appendChild(target); + } + }, 10); + } + + var samplers_click_handler = function(ev) { + setTimeout(function() { + if (samplers_dragging) { + return; + } + var target = $(ev.target).closest(".samplerslistitem"); + var next = target.parent().next().find(".samplerslistitem"); + if (!next.length) { + return; + } + next.parent().after(target.parent()); + }, 10); + } + // Make the userscripts menu sortable var us_sortable_settings = { placeholder: "ussortable-placeholder", + start: function() { us_dragging = true; }, + stop: function() { us_dragging = false; }, delay: 2, cursor: "move", tolerance: "pointer", @@ -2407,12 +2507,28 @@ $(document).ready(function(){ scrollSensitivity: 64, scrollSpeed: 10, } - $(usunloaded).sortable($.extend({ + usunloaded.sortable($.extend({ connectWith: "#uslistloaded", - }, us_sortable_settings)); - $(usloaded).sortable($.extend({ + }, us_sortable_settings)).on("click", ".uslistitem", us_click_handler); + usloaded.sortable($.extend({ connectWith: "#uslistunloaded", - }, us_sortable_settings)); + }, us_sortable_settings)).on("click", ".uslistitem", us_click_handler); + + // Make the samplers menu sortable + var samplers_sortable_settings = { + placeholder: "samplerssortable-placeholder", + start: function() { samplers_dragging = true; }, + stop: function() { samplers_dragging = false; }, + delay: 2, + cursor: "move", + tolerance: "pointer", + opacity: 0.21, + revert: 173, + scrollSensitivity: 64, + scrollSpeed: 10, + } + samplerslist.sortable($.extend({ + }, samplers_sortable_settings)).on("click", ".samplerslistitem", samplers_click_handler); // Bind actions to UI buttons button_send.on("click", function(ev) { @@ -2548,6 +2664,10 @@ $(document).ready(function(){ button_userscripts.on("click", function(ev) { socket.send({'cmd': 'uslistrequest', 'data': ''}); }); + + button_samplers.on("click", function(ev) { + socket.send({'cmd': 'samplerlistrequest', 'data': ''}); + }); load_close.on("click", function(ev) { hideLoadPopup(); @@ -2581,6 +2701,16 @@ $(document).ready(function(){ socket.send({'cmd': 'usload', 'data': ''}); hideUSPopup(); }); + + samplers_close.on("click", function(ev) { + hideSamplersPopup(); + }); + + samplers_accept.on("click", function(ev) { + hideMessage(); + socket.send({'cmd': 'samplers', 'data': samplerslist.find(".samplerslistitem").map(function() { return parseInt($(this).attr("sid")); }).toArray()}); + hideSamplersPopup(); + }); button_newgame.on("click", function(ev) { if(connected) { diff --git a/static/custom.css b/static/custom.css index 677fc028..640cb8db 100644 --- a/static/custom.css +++ b/static/custom.css @@ -22,6 +22,14 @@ chunk.editing, chunk.editing * { font-style: normal !important; } +.setting-value-warning { + color: #ff7777; +} + +.setting-value-warning:focus { + color: #ffaaaa !important; +} + .settinglabel input { width: 5ch; background-color: inherit; @@ -449,6 +457,26 @@ body.connected #popupfooter, #popupfooter.always-available { overflow-wrap: anywhere; } +#samplerspopup { + width: 300px; + background-color: #262626; + margin-top: 100px; +} + +@media (max-width: 768px) { + #samplerspopup { + width: 100%; + background-color: #262626; + margin-top: 100px; + } +} + +#samplerslist { + height: 300px; + overflow-y: scroll; + overflow-wrap: anywhere; +} + #nspopup { width: 350px; background-color: #262626; @@ -742,7 +770,7 @@ body.connected .dropdown-item:hover, .dropdown-item.always-available:hover { background-color: #3bf723; } -.ussortable-placeholder { +.ussortable-placeholder, .samplerssortable-placeholder { height: 4px; background-color: #3bf723; } @@ -1332,7 +1360,7 @@ body.connected .popupfooter, .popupfooter.always-available { background-color: #688f1f; } -.uslistitem { +.uslistitem, .samplerslistitem { padding: 12px 10px 12px 10px; display: flex; flex-grow: 1; @@ -1344,11 +1372,11 @@ body.connected .popupfooter, .popupfooter.always-available { transition: background-color 0.25s ease-in; } -.uslistitemsub { +.uslistitemsub, .samplerslistitemsub { color: #ba9; } -.uslistitem:hover { +.uslistitem:hover, .samplerslistitem:hover { cursor: move; background-color: #688f1f; } diff --git a/templates/index.html b/templates/index.html index 92b70753..36264e7a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -9,7 +9,7 @@ - + @@ -17,7 +17,7 @@ - + @@ -71,6 +71,9 @@ + @@ -299,6 +302,19 @@ +