diff --git a/aiserver.py b/aiserver.py index 98fd6246..6635b39d 100644 --- a/aiserver.py +++ b/aiserver.py @@ -11,6 +11,7 @@ eventlet.monkey_patch(all=True, thread=False) import os os.system("") os.environ['EVENTLET_THREADPOOL_SIZE'] = '1' +os.environ['TOKENIZERS_PARALLELISM'] = 'false' from eventlet import tpool from os import path, getcwd @@ -23,6 +24,8 @@ import packaging import contextlib import traceback import threading +import markdown +import bleach from collections.abc import Iterable from typing import Any, Callable, TypeVar, Tuple, Union, Dict, Set, List @@ -65,22 +68,22 @@ class colors: modellist = [ ["Load a model from its directory", "NeoCustom", ""], ["Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom", ""], - ["Skein 6B (Hybrid)", "KoboldAI/GPT-J-6B-Skein", "12GB"], - ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "12GB"], - ["Lit 6B (NSFW)", "hakurei/lit-6B", "12GB"], - ["C1 6B (Chatbot)", "hakurei/c1-6B", "12GB"], - ["Picard 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Picard", "6GB"], - ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "6GB"], - ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "6GB"], - ["Horni-LN 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "6GB"], - ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "6GB"], - ["GPT-J 6B", "EleutherAI/gpt-j-6B", "12GB"], - ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "6GB"], - ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "3GB"], - ["GPT-2 XL", "gpt2-xl", "8GB"], + ["Skein 6B (Hybrid)", "KoboldAI/GPT-J-6B-Skein", "16GB"], + ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"], + ["Lit 6B (NSFW)", "hakurei/lit-6B", "16GB"], + ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB"], + ["Picard 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Picard", "8GB"], + ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"], + ["Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"], + ["Horni-LN 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni-LN", "8GB"], + ["Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"], + ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB"], + ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB"], + ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB"], + ["GPT-2 XL", "gpt2-xl", "6GB"], ["GPT-2 Large", "gpt2-large", "4GB"], ["GPT-2 Med", "gpt2-medium", "2GB"], - ["GPT-2", "gpt2", "1GB"], + ["GPT-2", "gpt2", "2GB"], ["OpenAI API (requires API key)", "OAI", ""], ["InferKit API (requires API key)", "InferKit", ""], ["KoboldAI Server API (Old Google Colab)", "Colab", ""], @@ -183,6 +186,7 @@ class vars: useprompt = False # Whether to send the full prompt with every submit action breakmodel = False # For GPU users, whether to use both system RAM and VRAM to conserve VRAM while offering speedup compared to CPU-only bmsupported = False # Whether the breakmodel option is supported (GPT-Neo/GPT-J only, currently) + nobreakmodel = False # Something specifically requested Breakmodel to be disabled (For example a models config) smandelete = False # Whether stories can be deleted from inside the browser smanrename = False # Whether stories can be renamed from inside the browser allowsp = False # Whether we are allowed to use soft prompts (by default enabled if we're using GPT-2, GPT-Neo or GPT-J) @@ -202,6 +206,8 @@ class vars: nopromptgen = False rngpersist = False nogenmod = False + welcome = False # Custom Welcome Text (False is default) + newlinemode = "n" quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page) debug = False # If set to true, will send debug information to the client for display @@ -209,7 +215,7 @@ class vars: # Function to get model selection at startup #==================================================================# def getModelSelection(): - print(" # Model V/RAM\n =========================================") + print(" # Model VRAM\n =========================================") i = 1 for m in modellist: print(" {0} - {1}\t\t{2}".format("{:<2}".format(i), m[0].ljust(15), m[2])) @@ -301,7 +307,7 @@ def device_config(model): assert sum(breakmodel.gpu_blocks) <= n_layers n_layers -= sum(breakmodel.gpu_blocks) except: - print("WARNING: --layers is malformatted. Please use the --help option to see correct usage of --layers. Defaulting to all layers on device 0.", file=sys.stderr) + print("WARNING: --breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0.", file=sys.stderr) breakmodel.gpu_blocks = [n_layers] n_layers = 0 elif(args.breakmodel_layers is not None): @@ -389,6 +395,54 @@ def device_config(model): generator = model.generate breakmodel.move_hidden_layers(model.transformer) +#==================================================================# +# Allow the models to override some settings +#==================================================================# +def loadmodelsettings(): + try: + model_js_config = str(model_config).partition(' ')[2] + js = json.loads(model_js_config) + except Exception as e: + try: + model_js_config = open(vars.custmodpth + "/config.json", "r") + except Exception as e: + model_js_config = open(vars.custmodpth.replace('/', '_') + "/config.json", "r") + js = json.load(model_js_config) + if("badwordsids" in js): + vars.badwordsids = js["badwordsids"] + if("nobreakmodel" in js): + vars.nobreakmodel = js["nobreakmodel"] + if("temp" in js): + vars.temp = js["temp"] + if("top_p" in js): + vars.top_p = js["top_p"] + if("top_k" in js): + vars.top_k = js["top_k"] + if("tfs" in js): + vars.tfs = js["tfs"] + if("rep_pen" in js): + vars.rep_pen = js["rep_pen"] + if("rep_pen_slope" in js): + vars.rep_pen_slope = js["rep_pen_slope"] + if("rep_pen_range" in js): + vars.rep_pen_range = js["rep_pen_range"] + if("adventure" in js): + vars.adventure = js["adventure"] + if("chatmode" in js): + vars.chatmode = js["chatmode"] + if("dynamicscan" in js): + vars.dynamicscan = js["dynamicscan"] + if("formatoptns" in js): + vars.formatoptns = js["formatoptns"] + if("welcome" in js): + vars.welcome = js["welcome"] + if("newlinemode" in js): + vars.newlinemode = js["newlinemode"] + if("antemplate" in js): + vars.setauthornotetemplate = js["antemplate"] + if(not vars.gamestarted): + vars.authornotetemplate = vars.setauthornotetemplate + #==================================================================# # Startup #==================================================================# @@ -402,11 +456,12 @@ parser.add_argument("--path", help="Specify the Path for local models (For model parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.") parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS) parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS) -parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --layers 8,9,11") +parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --beakmodel_gpulayers 8,9,11") parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.") parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.") parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.") parser.add_argument("--colab", action='store_true', help="Optimize for Google Colab.") +parser.add_argument("--nobreakmodel", action='store_true', help="Disables Breakmodel support completely.") parser.add_argument("--share", action='store_true', default=False, help="If present will launch KoboldAI available to all computers rather than local only") parser.add_argument("--quiet", action='store_true', default=False, help="If present will suppress any story related text from showing on the console") @@ -426,7 +481,11 @@ if args.colab: args.remote = True; args.override_rename = True; args.override_delete = True; - + args.nobreakmodel = True; + +if args.nobreakmodel: + vars.nobreakmodel = True; + if args.remote: vars.remote = True; @@ -445,7 +504,7 @@ if args.model: vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple else: - print("{0}Welcome to the KoboldAI Server!\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END)) + print("{0}Welcome to the KoboldAI Server!\nListed RAM is the optimal VRAM and CPU ram can be up to twice the amount.\nMost models can run at less VRAM with reduced max tokens or less layers on the GPU.\nSelect an AI model to continue:{1}\n".format(colors.CYAN, colors.END)) getModelSelection() # If transformers model was selected & GPU available, ask to use CPU or GPU @@ -482,15 +541,19 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme elif(vars.model_type == "not_found"): print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)") vars.model_type = "gpt_neo" + loadmodelsettings() print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="") vars.hascuda = torch.cuda.is_available() - vars.bmsupported = vars.model_type in ("gpt_neo", "gptj") and not args.colab + vars.bmsupported = vars.model_type in ("gpt_neo", "gptj") and not vars.nobreakmodel if(args.breakmodel is not None and args.breakmodel): - print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --layers is used (see --help for details).", file=sys.stderr) + print("WARNING: --breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).", file=sys.stderr) if(args.breakmodel_layers is not None): - print("WARNING: --breakmodel_layers is deprecated. Use --layers instead (see --help for details).", file=sys.stderr) + print("WARNING: --breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).", file=sys.stderr) + if(args.model and vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers): + print("WARNING: Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.", file=sys.stderr) + vars.bmsupported = False if(not vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None)): - print("WARNING: This model does not support hybrid generation. --layers will be ignored.", file=sys.stderr) + print("WARNING: This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.", file=sys.stderr) if(vars.hascuda): print("{0}FOUND!{1}".format(colors.GREEN, colors.END)) else: @@ -917,7 +980,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme model = AutoModelForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache/", **lowmem) except ValueError as e: model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache/", **lowmem) - elif(os.path.isdir(format(vars.model.replace('/', '_')))): + elif(os.path.isdir(vars.model.replace('/', '_'))): with(maybe_use_float16()): try: tokenizer = AutoTokenizer.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/") @@ -1071,6 +1134,7 @@ else: print("{0}Initializing Mesh Transformer JAX, please wait...{1}".format(colors.PURPLE, colors.END)) assert vars.model == "TPUMeshTransformerGPTJ" and vars.custmodpth and os.path.isdir(vars.custmodpth) import tpu_mtj_backend + tpu_mtj_backend.vars = vars tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback tpu_mtj_backend.compiling_callback = tpumtjgenerate_compiling_callback @@ -2129,14 +2193,25 @@ def sendUSStatItems(): emit('from_server', {'cmd': 'usstatitems', 'data': loaded, 'flash': last_userscripts != vars.last_userscripts}, broadcast=True) vars.last_userscripts = last_userscripts +#==================================================================# +# KoboldAI Markup Formatting (Mixture of Markdown and sanitized html) +#==================================================================# +def kml(txt): + txt = txt.replace('>', '>') + txt = bleach.clean(markdown.markdown(txt), tags = ['p', 'em', 'strong', 'code', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'ul', 'b', 'i', 'a', 'span', 'button'], styles = ['color', 'font-weight'], attributes=['id', 'class', 'style', 'href']) + return txt + #==================================================================# # Send start message and tell Javascript to set UI state #==================================================================# def setStartState(): - txt = "Welcome to KoboldAI! You are running "+getmodelname()+".
" - if(not vars.noai): - txt = txt + "Please load a game or enter a prompt below to begin!
" + if(vars.welcome): + txt = kml(vars.welcome) + "
" else: + txt = "Welcome to KoboldAI! You are running "+getmodelname()+".
" + if(not vars.noai and not vars.welcome): + txt = txt + "Please load a game or enter a prompt below to begin!
" + if(vars.noai): txt = txt + "Please load or import a story to read. There is no AI in this mode." emit('from_server', {'cmd': 'updatescreen', 'gamestarted': vars.gamestarted, 'data': txt}, broadcast=True) emit('from_server', {'cmd': 'setgamestate', 'data': 'start'}, broadcast=True) @@ -2190,6 +2265,8 @@ def savesettings(): js["rngpersist"] = vars.rngpersist js["nogenmod"] = vars.nogenmod js["autosave"] = vars.autosave + js["welcome"] = vars.welcome + js["newlinemode"] = vars.newlinemode js["antemplate"] = vars.setauthornotetemplate @@ -2264,6 +2341,10 @@ def loadsettings(): vars.nogenmod = js["nogenmod"] if("autosave" in js): vars.autosave = js["autosave"] + if("newlinemode" in js): + vars.newlinemode = js["newlinemode"] + if("welcome" in js): + vars.welcome = js["welcome"] if("antemplate" in js): vars.setauthornotetemplate = js["antemplate"] @@ -2291,47 +2372,6 @@ def loadsettings(): file.close() -#==================================================================# -# Allow the models to override some settings -#==================================================================# -def loadmodelsettings(): - try: - model_js_config = str(model_config).partition(' ')[2] - js = json.loads(model_js_config) - except Exception as e: - try: - model_js_config = open(vars.custmodpth + "/config.json", "r") - except Exception as e: - model_js_config = open(vars.custmodpth.replace('/', '_') + "/config.json", "r") - js = json.load(model_js_config) - if("badwordsids" in js): - vars.badwordsids = js["badwordsids"] - if("temp" in js): - vars.temp = js["temp"] - if("top_p" in js): - vars.top_p = js["top_p"] - if("top_k" in js): - vars.top_k = js["top_k"] - if("tfs" in js): - vars.tfs = js["tfs"] - if("rep_pen" in js): - vars.rep_pen = js["rep_pen"] - if("rep_pen_slope" in js): - vars.rep_pen_slope = js["rep_pen_slope"] - if("rep_pen_range" in js): - vars.rep_pen_range = js["rep_pen_range"] - if("adventure" in js): - vars.adventure = js["adventure"] - if("chatmode" in js): - vars.chatmode = js["chatmode"] - if("dynamicscan" in js): - vars.dynamicscan = js["dynamicscan"] - if("formatoptns" in js): - vars.formatoptns = js["formatoptns"] - if("antemplate" in js): - vars.setauthornotetemplate = js["antemplate"] - if(not vars.gamestarted): - vars.authornotetemplate = vars.setauthornotetemplate #==================================================================# # Don't save settings unless 2 seconds have passed without modification @@ -2369,7 +2409,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, # Ignore new submissions if the AI is currently busy if(vars.aibusy): return - + while(True): set_aibusy(1) @@ -2392,7 +2432,11 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, data = re.sub(r'\n+', ' ', data) if(len(data)): data = f"\n{vars.chatname} : {data}\n" - + + # mode + if(vars.newlinemode == "s"): + data = data.replace('\n', "") + # If we're not continuing, store a copy of the raw input if(data != ""): vars.lastact = data @@ -2744,7 +2788,7 @@ def calcsubmit(txt): actionlen = len(vars.actions) winfo, mem, anotetxt, found_entries = calcsubmitbudgetheader(txt) - + # For all transformers models if(vars.model != "InferKit"): subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt) @@ -3296,7 +3340,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): # Replaces returns and newlines with HTML breaks #==================================================================# def formatforhtml(txt): - return txt.replace("\\r\\n", "
").replace("\\r", "
").replace("\\n", "
").replace("\r\n", "
").replace('\n', '
').replace('\r', '
') + return txt.replace("\\r\\n", "
").replace("\\r", "
").replace("\\n", "
").replace("\r\n", "
").replace('\n', '
').replace('\r', '
').replace('</s>', '
') #==================================================================# # Strips submitted text from the text returned by the AI @@ -3323,13 +3367,16 @@ def applyinputformatting(txt): # Add sentence spacing if(vars.formatoptns["frmtadsnsp"]): txt = utils.addsentencespacing(txt, vars) - + return txt #==================================================================# # Applies chosen formatting options to text returned from AI #==================================================================# def applyoutputformatting(txt): + # Revert S mode on output to maintain compatibility + txt = txt.replace('', "\n") + # Use standard quotes and apostrophes txt = utils.fixquotes(txt) @@ -4781,8 +4828,6 @@ def randomGameRequest(topic, memory=""): emit('from_server', {'cmd': 'setmemory', 'data': vars.memory}, broadcast=True) # Load desired settings from both the model and the users config file -if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransformerGPTJ"]): - loadmodelsettings() loadsettings() # Prevent tokenizer from taking extra time the first time it's used diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml index d3e2e8ae..b012beb2 100644 --- a/environments/finetuneanon.yml +++ b/environments/finetuneanon.yml @@ -11,6 +11,8 @@ dependencies: - tensorflow-gpu - python=3.8.* - eventlet + - markdown + - bleach - pip - git - pip: diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 462a9e22..7d9d8e69 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -1,6 +1,5 @@ name: koboldai channels: - - huggingface - pytorch - conda-forge - defaults @@ -12,6 +11,8 @@ dependencies: - cudatoolkit=11.1 - transformers - eventlet + - markdown + - bleach - pip - git - pip: diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml index 53dc26c2..50d5d520 100644 --- a/environments/rocm-finetune.yml +++ b/environments/rocm-finetune.yml @@ -7,6 +7,8 @@ dependencies: - flask-socketio - python=3.8.* - eventlet + - markdown + - bleach - pip - git - pip: diff --git a/environments/rocm.yml b/environments/rocm.yml index cae0c152..8690ec57 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -1,6 +1,5 @@ name: koboldai channels: - - huggingface - conda-forge - defaults dependencies: @@ -9,6 +8,8 @@ dependencies: - flask-socketio - python=3.8.* - eventlet + - markdown + - bleach - pip - git - pip: diff --git a/readme.md b/readme.md index 8e949828..a6050f59 100644 --- a/readme.md +++ b/readme.md @@ -4,13 +4,13 @@ This is a browser-based front-end for AI-assisted writing with multiple local & ## Multiple ways to play -Stories can be played like a Novel, or played like a text adventure game with an easy toggle to change between the two gameplay styles. This makes KoboldAI both a writing assistant and a game. The way you play and how good the AI will be depends on the model or service you decide to use. No matter if you want to use the free, fast power of Google Colab, your own high end graphics card, an online service you have an API key for (Like OpenAI or Inferkit) or if you rather just run it slower on your CPU you will be able to find a way to use KoboldAI that works for you. +Stories can be played like a Novel, a text adventure game or used as a chatbot with an easy toggles to change between the multiple gameplay styles. This makes KoboldAI both a writing assistant, a game and a platform for so much more. The way you play and how good the AI will be depends on the model or service you decide to use. No matter if you want to use the free, fast power of Google Colab, your own high end graphics card, an online service you have an API key for (Like OpenAI or Inferkit) or if you rather just run it slower on your CPU you will be able to find a way to use KoboldAI that works for you. ### Adventure mode By default KoboldAI will run in a generic mode optimized for writing, but with the right model you can play this like AI Dungeon without any issues. You can enable this in the settings and bring your own prompt, try generating a random prompt or download one of the prompts available at [prompts.aidg.club](https://prompts.aidg.club) . -The gameplay will be slightly different than the gameplay in AI Dungeon because we adopted the style of the Unleashed fork, giving you full control over all the characters because we do not automatically adapt your sentences behind the scenes. This means you can more reliably control characters that are not you. +The gameplay will be slightly different than the gameplay in AI Dungeon because we adopted the Type of the Unleashed fork, giving you full control over all the characters because we do not automatically adapt your sentences behind the scenes. This means you can more reliably control characters that are not you. As a result of this what you need to type is slightly different, in AI Dungeon you would type ***take the sword*** while in KoboldAI you would type it like a sentence such as ***You take the sword*** and this is best done with the word You instead of I. @@ -23,22 +23,64 @@ If you want to do this with your friends we advice using the main character as Y If you want to use KoboldAI as a writing assistant this is best done in the regular mode with a model optimized for Novels. These models do not make the assumption that there is a You character and focus on Novel like writing. For writing these will often give you better results than Adventure or Generic models. That said, if you give it a good introduction to the story large generic models like 6B can be used if a more specific model is not available for what you wish to write. You can also try to use models that are not specific to what you wish to do, for example a NSFW Novel model for a SFW story if a SFW model is unavailable. This will mean you will have to correct the model more often because of its bias, but can still produce good enough results if it is familiar enough with your topic. +### Chatbot Mode + +In chatbot mode you can use a suitable model as a chatbot, this mode automatically adds your name to the beginning of the sentences and prevents the AI from talking as you. To use it properly you must write your story opening as both characters in the following format (You can use your own text) : + +``` ChatBot Opening Example +Bot : Hey! +You : Hey Boyname, how have you been? +Bot : Been good! How about you? +You : Been great to, excited to try out KoboldAI +Bot : KoboldAI is really fun! +You : For sure! What is your favorite game? +``` + +Its recommended to have your own input be the last input, especially in the beginning its possible that the AI mixes up the names. In that case either retry or manually correct the name. This behavior improves as the chat progresses. Some models may swap names if they are more familiar with a different name that is similar to the name you defined for the bot. In that case you can either do the occasional manual correction or choose a name for your chatbot that the AI likes better. + +This mode works the best on either a Generic model or a chatbot model specifically designed for it, some models like the AvrilAI model are instead designed to be used in Adventure mode and do not conform to the format above. These models typically ship with adventure mode enabled by default and should not be switched over to chatbot mode. + +Novel or Adventure models are not recommended for this feature but might still work but can derail away from the conversation format quickly. + + + ## Play KoboldAI online for free on Google Colab (The easiest way to play) -We provide multiple ready made versions to get you going, click on the name for a link to the specific version. These run entirely on Google's Servers and will automatically upload saves to your Google Drive if you choose to manually save a story. Each version has slightly different instructions on how to use them (Many need some space on your google drive to run, others may need some manual steps) that are listed on the page. +If you would like to play KoboldAI online for free on a powerful computer you can use Google Colaboraty. We provide two editions, a TPU and a GPU edition with a variety of models available. These run entirely on Google's Servers and will automatically upload saves to your Google Drive if you choose to save a story (Alternatively, you can choose to download your save instead so that it never gets stored on Google Drive). Detailed instructions on how to use them are at the bottom of the Colab's. -TPU editions work on any configuration of TPU Google gives out at the time of writing. GPU editions are subject to a GPU lottery and may crash on launch if you are unlucky (Especially if a lot of users are using up the good GPU's or you have been using Colab often). +Each edition features different models and requires different hardware to run, this means that if you are unable to obtain a TPU or a GPU you might still be able to use the other version. The models you can use are listed underneath the edition. To open a Colab click the big link featuring the editions name. -[Click here to open the Recommended version](https://henk.tech/colabkobold) +### [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb) + +| Model | Size | Type | Drive Space | Description | +| ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ | +| Skein 6B by VE_FORBDRYDERNE | 6B TPU | Hybrid | 0 GB | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. | +| Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). | +| Lit 6B by Haru | 6B TPU | NSFW | 8 GB / 12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. | +| Generic 6B by EleutherAI | 6B TPU | Generic | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. | +| C1 6B by Haru | 6B TPU | Chatbot | 8 GB / 12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. | + +### [GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb) + +| Model | Size | Type | Description | +| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ | +| [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel Type writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. | +| [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. | +| [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW | This model is tuned on Literotica to produce a Novel Type model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. | +| [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. | + +### Model Types +| Type | Description | +| --------- | ------------------------------------------------------------ | +| Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. | +| NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. | +| Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel Type model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. | +| Chatbot | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. | +| Hybrid | Hybrid models are a blend between different Types, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playTypes and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). | +| Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. | -| Version | Model | Size | Style | Description | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | --------------- | ------------------------------------------------------------ | -| [Adventure 6B](https://colab.research.google.com/drive/1vdAsD0xCc_YsAXqBUxb_QAwPOXkFJtxm?usp=sharing#sandboxMode=true) | [gpt-j-6b-adventure-jax](https://wandb.ai/ve-forbryderne/adventure/runs/carol-data/files/models) by ve_forbryderne (Download the -hf version if you plan to run this locally) | 6B TPU | Adventure | This is the Recommended version for AI Dungeon players, this is effectively a Free Griffin but with more control. This Colab edition provides better memory than Griffin would have given you, allowing for a more coherent experience. And while it will still generate characters like The Great Litch Lord that AI Dungeon players are familiar with it was trained on stories beyond AI Dungeon and is more balanced in its approaches. This is a TPU edition so it can fit a lot in memory | -| [Skein](https://colab.research.google.com/drive/1ZAKgkSyyfiZN87npKYaRM8vL4OF2Btfg?usp=sharing#sandboxMode=true) | gpt-j-6b-skein-jax by ve_forbryderne (Download the -hf version if you plan to run this locally) | 6B TPU | Novel/Adventure | Skein is a hybrid between a Novel model and the Adventure model. Because of this it needs a bit more context about the writing style (Needing a few retries in the random story generator if you use this). It was trained on both Light Novels and choose your own adventure stories along side extra information to help it understand story themes better. It is recommended to play this with Adventure mode enabled to prevent it from doing "Actions" even if you wish to use it for Novel writing. If you wish to use it for Novel writing you can do this by toggling the input to Story. | -| [Generic 6B TPU](https://colab.research.google.com/drive/1pG9Gz9PrqklNBESPNaXvfctMVnvwf_Q8#forceEdit=true&sandboxMode=true&scrollTo=jcxnaOk5Th4x) | [Original GPT-6-JAX Slim](https://the-eye.eu/public/AI/GPT-J-6B/step_383500_slim.tar.gz) (Requires a TPU and does not work local) | 6B TPU | Novel | The recommended model if you want a generic experience. This model is not optimized for anything in particular and works best when you give it a longer introduction. Make sure to include examples for the AI to learn from and write the first part of the story entirely yourself. Then it should be able to learn from your style and continue from there. Very sensitive to a high temp because it knows webpages and code, so when configured incorrectly it will easily end a story with 'Rate my blogpost, follow me on twitter' and the likes. | -| [Horni](https://colab.research.google.com/drive/1QwjkK_JeK9aYEkyM_6nrJXQARFMnBDmG?usp=sharing#sandboxMode=true) (Formerly Novel/NSFW) | [GPT-Neo-2.7B-Horni](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-horni.7z) by finetune | 2.7B GPU | Novel | One of the oldest models in our collection, tuned on Literotica to produce a Novel style model optimized towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. Because this is an older 2.7B model it is only compatible as a GPU instance. Most GPU's in Colab are powerful enough to run this well but it will crash if you get something weak like a Nvidia P7. | -| [Picard](https://colab.research.google.com/drive/1VNVKtbPaTcmkQzy8bEQkd9SUiUJBdbEL?usp=sharing#sandboxMode=true) | [Picard](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-picard.7z) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. Most GPU's in Colab are powerful enough to run this well but it will crash if you get something weak like a Nvidia P7. | -| [Shinen](https://colab.research.google.com/drive/1-7Lkj-np2DaSnmq1OdPYkel6W2rh4E-0?usp=sharing#sandboxMode=true) | [Shinen](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-shinen.7z) by Mr Seeker | 2.7B GPU | Novel | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. Most GPU's in Colab are powerful enough to run this well but it will crash if you get something weak like a Nvidia P7. | ## Install KoboldAI on your own computer @@ -51,11 +93,11 @@ KoboldAI is a rolling release on our github, the code you see is also the game. ### Installing KoboldAI on Windows 10 or higher using the KoboldAI Runtime Installer 1. Extract the .zip to a location you wish to install KoboldAI, you will need roughly 20GB of free space for the installation (this does not include the models). -2. Open install_requirements.bat as administrator. -3. Choose either the Finetuneanon or the Regular version of transformers (Finetuneanon works better for GPU players but breaks CPU mode, only use this version if you have a modern Nvidia GPU with enough VRAM for the model you wish to run). -4. You will now be asked to choose the installation mode, we **strongly** recommend the Temporary K: drive option for anyone who does not already have a K: drive on their computer. This option eliminates most installation issues and also makes KoboldAI portable. The K: drive will be gone after a reboot and will automatically be recreated each time you play KoboldAI. +2. Open install_requirements.bat as **administrator**. +3. Choose the regular version of Transformers (Option 1), finetuneanon is depreciated and no longer recommended. +4. You will now be asked to choose the installation mode, we **strongly** recommend the Temporary B: drive option. This option eliminates most installation issues and also makes KoboldAI portable. The B: drive will be gone after a reboot and will automatically be recreated each time you play KoboldAI. 5. The installation will now automatically install its requirements, some stages may appear to freeze do not close the installer until it asks you to press a key. Before pressing a key to exit the installer please check if errors occurred. Most problems with the game crashing are related to installation/download errors. Disabling your antivirus can help if you get errors. -6. Use play.bat to play the game. +6. Use play.bat to start KoboldAI. ### Manual installation / Linux / Mac @@ -63,20 +105,13 @@ We can not provide a step by step guide for manual installation due to the vast If you would like to manually install KoboldAI you will need some python/conda package management knowledge to manually do one of the following steps : -1. Use our bundled environments files to install your own conda environment, this should also automatically install CUDA. -2. If you do not want to use conda install the requirements listed in requirements.txt and make sure that CUDA is properly installed. -3. Adapt and use our bundled docker files to create your own KoboldAI docker instance. +1. Use our bundled environments files to install your own conda environment, this should also automatically install CUDA (Recommended, you can get Miniconda from https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links). The recommended configuration is huggingface.yml for CUDA users and rocm.yml for ROCm users. +2. If you have a working copy of Docker for either CUDA or ROCm try play-cuda.sh or play-rocm.sh to launch the docker versions. In this case the installation is mostly automatic. +3. If conda is proving difficult you could also look inside requirements.txt for the required dependencies and try to install them yourself. This will likely be a mixture of pip and your native package manager, just installing our requirements.txt is not recommended since to speed things up we do not force any version changes. For local installations definitely prioritize conda as that is a better way for us to enforce you have the latest compatible versions. -### Using an AMD GPU on Linux +### AMD GPU's -AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. This guide requires that you already followed the appropriate steps to configure both [ROCm](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) and [Docker]([Install Docker Engine | Docker Documentation](https://docs.docker.com/engine/install/)) and is for advanced users only. - -1. Make sure you have installed both the latest version of [Docker](https://docs.docker.com/engine/install/), docker-compose and [ROCm](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) on your system and have configured your user to have access to the Docker group (Sudo can interfere with the dialogues). -2. Assign our play-rocm.sh file execute permissions (chmod +x play-rocm.sh). -3. Run our play-rocm.sh file, it should now automatically install and create a suitable runtime for KoboldAI with AMD support and directly run the game afterwards. For X11 forwarding support you will need to run this as sudo at least once at the local machine. Otherwise use the command line options to load KoboldAI if you are playing this remotely. -4. Currently models automatically downloaded by the game are discarded on exit in the Docker version, it is strongly recommended that you manually download a model and load this using the custom model features to prevent unnecessary downloads. - -If you hit strange errors with the ROCm version where it fails on the installation be sure you are running the latest version of Docker and Docker-compose. Some versions will fail on the root elevation or lack the appropriate formats. +AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. ### Troubleshooting @@ -100,50 +135,74 @@ Like with Python version conflicts we recommend uninstalling CUDA from your syst If you do not have a suitable Nvidia GPU that can run on CUDA10 or Higher and that supports Compute Capabilities 5.0 or higher we can not help you get the game detected on the GPU. Unless you are following our ROCm guide with a compatible AMD GPU. -#### "LayerNormKernelImpl" not implemented for 'Half' - -This error only occurs when you are trying to run a model on the CPU mode while Finetuneanon's version of Transformers is installed. If you want/need to use the CPU mode use the install_requirements.bat file with the Official Transformers option and choose to delete all existing files. - #### vocab.json / config.json is not found error If you get these errors you either did not select the correct folder for your custom model or the model you have downloaded is not (yet) compatible with KoboldAI. There exist a few models out there that are compatible and provide a pytorch_model.bin file but do not ship all the required files. In this case try downloading a compatible model of the same kind (For example another GPT-Neo if you downloaded a GPT-Neo model) and replace the pytorch_model.bin file with the one you are trying to run. Chances are this will work fine. ## KoboldAI Compatible Models -The models listed in the KoboldAI menu are generic models meant to easily get you going based on the Huggingface service. For higher quality models and fully offline use you will need to manually download a suitable model for your style. These are some of the models the community has available for you all tested to be compatible with KoboldAI and will be the brain of the AI. +Most of the high quality models have been integrated in the menu, these models have their download link removed since the easiest way to obtain them is to run them directly from the menu. KoboldAI will automatically download and convert the models to a offline format for later use. + +If you have old 6B versions which end in -hf they will no longer be compatible with the newer versions of transformers and will no longer behave correctly. It is highly recommended that you install the official version of transformers (offline installers for KoboldAI contain this version by default) and redownload these models from the menu to get compatible versions. If you have very limited internet we will for a limited time also offer finetuneanon's fork in the install_requirements.bat file, when using that option you will not be able to use the 6B models in our main menu so definitely upgrade when your internet allows. + +The VRAM requirements amounts are the recommended amounts for fast smooth play, playing with lower VRAM is possible but then you may need to either lower the amount of tokens in the settings, or you may need to put less layers on your GPU causing a significant performance loss. + +**For CPU players and during the loading regular RAM usage is double of what we list here.** | **Model** | Type | **(V)RAM** | Repetition Penalty | Description | | ------------------------------------------------------------ | --------------------------------- | ---------- | ------------------ | ------------------------------------------------------------ | -| [gpt-j-6b-adventure-jax-hf](https://api.wandb.ai/files/ve-forbryderne/adventure/carol-data/models/gpt-j-6b-adventure-hf.7z) | Adventure / 6B / Neo Custom | 16GB | 1.2 | This model has been trained on the AI Dungeon set with additional stories thrown in. It is the most well rounded AI Dungeon like model and can be seen as an improved Griffin. If you wish to play KoboldAI like AI Dungeon this is the one to pick. It works great with the random story generator if your temp is 0.5 . | -| [gpt-j-6b-skein-jax-hf](https://api.wandb.ai/files/ve-forbryderne/skein/files/gpt-j-6b-skein-hf.7z) | Adventure Novel / 6B / Neo Custom | 16GB | 1.1 | A hybrid of a few different datasets aimed to create a balanced story driven experience. If the adventure model is to focused on its own adventures and you want something a bit more generic this is the one for you. This model understands tags and adventure mode but can also be used as a writing assistant for your Novel. Its a good middle ground between a finetuned model and a generic model. It needs more guidance than some of the other models do making it less suitable for random story generation, but still focusses on writing rather than websites or code. If you want to use a model for existing story idea's this is a great choice. | -| [gpt-neo-2.7B-aid](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-aid.7z) | Adventure / 2.7B / Neo Custom | 8GB | 2.0 | This is one of the closest replications of the original AI Dungeon Classic model. Tuned on the same data that got uploaded alongside AI Dungeon. In KoboldAI we noticed this model performs better than the conversions of the original AI Dungeon model. It has all the traits you expect of AI Dungeon Classic while not having as many artifacts as this model was trained specifically for KoboldAI. Must be played with Adventure mode enabled to prevent it from doing actions on your behalf. | -| [gpt-neo-2.7B-horni](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-horni.7z) | Novel / 2.7B / Neo Custom | 8GB | 2.0 | One of the best novel models available for 2.7B focused on NSFW content. This model trains the AI to write in a story like fashion using a very large collection of Literotica stories. It is one of the original finetuned models for 2.7B. | -| [gpt-neo-2.7B-horni-ln](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-horni-ln.7z) | Novel / 2.7B / Neo Custom | 8GB | 2.0 | This model is much like the one above, but has been additionally trained on regular light novels. More likely to go SFW and is more focused towards themes found in these light novels over general cultural references. This is a good model for Novel writing especially if you want to add erotica to the mix. | -| [gpt-neo-2.7B-picard](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-picard.7z) | Novel / 2.7B / Neo Custom | 8GB | 2.0 | Picard is another Novel model, this time exclusively focused on SFW content of various genres. Unlike the name suggests this goes far beyond Star Trek stories and is not exclusively sci-fi. | -| [gpt-neo-2.7B-shinen](https://storage.henk.tech/KoboldAI/gpt-neo-2.7B-shinen.7z) | Novel / 2.7B / Neo Custom | 8GB | 2.0 | The most NSFW of them all, Shinen WILL make things sexual. This model will assume that whatever you are doing is meant to be a sex story and will sexualize constantly. It is designed for people who find Horni to tame. It was trained on SexStories instead of Literotica and was trained on tags making it easier to guide the AI to the right context. | -| [GPT-J-6B (Converted)](https://storage.henk.tech/KoboldAI/gpt-j-6b.7z) | Generic / 6B / Neo Custom | 16GB | 1.1 | This is the basis for all the other GPT-J-6B models, it has been trained on The Pile and is an open alternative for GPT Curie. Because it is a generic model it is not particularly good at anything and needs a long introduction to understand what you want to do. It is however the most flexible because it has no bias. If you want to do something that has no specific model available, such as writing a webpage article or coding this can be a good one to try. This specific version was converted by our community to be able to run as a GPT-Neo model on your GPU. | +| Skein 6B by VE_FORBDRYERNE | Adventure Novel / 6B / Neo Custom | 16GB | 1.1 | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. | +| Adventure 6B by VE_FORBRYDERNE | Adventure / 6B / Neo Custom | 16GB | 1.2 | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). | +| Adventure 2.7B by melastashco | Adventure / 2.7B / Neo Custom | 8GB | 2.0 | This is one of the closest replications of the original AI Dungeon Classic model. Tuned on the same data that got uploaded alongside AI Dungeon. In KoboldAI we noticed this model performs better than the conversions of the original AI Dungeon model. It has all the traits you expect of AI Dungeon Classic while not having as many artifacts as this model was trained specifically for KoboldAI. Must be played with Adventure mode enabled to prevent it from doing actions on your behalf. | +| Horni 2.7B by finetuneanon | Novel / 2.7B / Neo Custom | 8GB | 2.0 | One of the best novel models available for 2.7B focused on NSFW content. This model trains the AI to write in a story like fashion using a very large collection of Literotica stories. It is one of the original finetuned models for 2.7B. | +| Horni-LN 2.7B by finetuneanon | Novel / 2.7B / Neo Custom | 8GB | 2.0 | This model is much like the one above, but has been additionally trained on regular light novels. More likely to go SFW and is more focused towards themes found in these light novels over general cultural references. This is a good model for Novel writing especially if you want to add erotica to the mix. | +| Picard 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom | 8GB | 2.0 | Picard is another Novel model, this time exclusively focused on SFW content of various genres. Unlike the name suggests this goes far beyond Star Trek stories and is not exclusively sci-fi. | +| Shinen 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom | 8GB | 2.0 | The most NSFW of them all, Shinen WILL make things sexual. This model will assume that whatever you are doing is meant to be a sex story and will sexualize constantly. It is designed for people who find Horni to tame. It was trained on SexStories instead of Literotica and was trained on tags making it easier to guide the AI to the right context. | | [AID-16Bit](https://storage.henk.tech/KoboldAI/aid-16bit.zip) | Adventure / 1.5B / GPT-2 Custom | 4GB | 2.0 | The original AI Dungeon Classic model converted to Pytorch and then converted to a 16-bit Model making it half the size. | | [model_v5_pytorch](https://storage.henk.tech/KoboldAI/model_v5_pytorch.zip) (AI Dungeon's Original Model) | Adventure / 1.5B / GPT-2 Custom | 8GB | 2.0 | This is the original AI Dungeon Classic model converted to the Pytorch format compatible with AI Dungeon Clover and KoboldAI. We consider this model inferior to the GPT-Neo version because it has more artifacting due to its conversion. This is however the most authentic you can get to AI Dungeon Classic. | | [Novel 774M](https://storage.henk.tech/KoboldAI/Novel%20model%20774M.rar) | Novel / 774M / GPT-2 Custom | 4GB | 2.0 | Novel 774M is made by the AI Dungeon Clover community, because of its small size and novel bias it is more suitable for CPU players that want to play with speed over substance or players who want to test a GPU with a low amount of VRAM. These performance savings are at the cost of story quality and you should not expect the kind of in depth story capabilities that the larger models offer. It was trained for SFW stories. | | [Smut 774M](https://storage.henk.tech/KoboldAI/Smut%20model%20774M%2030K.rar) | Novel / 774M / GPT-2 Custom | 4GB | 2.0 | The NSFW version of the above, its a smaller GPT-2 based model made by the AI Dungeon Clover community. Gives decent speed on a CPU at the cost of story quality like the other 774M models. | -| [Mia](https://storage.henk.tech/KoboldAI/Mia.7z) | Adventure / 125M / Neo Custom | 1GB | 2.0 | Mia is the smallest Adventure model, it runs at very fast speeds on the CPU which makes it a good testing model for developers who do not have GPU access. Because of its small size it will constantly attempt to do actions on behalf of the player and it will not produce high quality stories. If you just need a small model for a quick test, or if you want to take the challenge of trying to run KoboldAI entirely on your phone this would be an easy model to use due to its small RAM requirements and fast (loading) speeds. | +| [Mia (GPT-Neo-125M-AID)](https://huggingface.co/KoboldAI/GPT-Neo-125M-AID) by Henk717 | Adventure / 125M / Neo Custom | 1GB | 2.0 | Mia is the smallest Adventure model, it runs at very fast speeds on the CPU which makes it a good testing model for developers who do not have GPU access. Because of its small size it will constantly attempt to do actions on behalf of the player and it will not produce high quality stories. If you just need a small model for a quick test, or if you want to take the challenge of trying to run KoboldAI entirely on your phone this would be an easy model to use due to its small RAM requirements and fast (loading) speeds. | +## Softprompts +Softprompts (also known as Modules in other products) are addons that can change the output of existing models. For example you may load a softprompt that biases the AI towards a certain subject and style like transcripts from your favorite TV show. + +Since these softprompts are often based on existing franchises we currently do not bundle any of them with KoboldAI due to copyright concerns (We do not want to put the entire project at risk). Instead look at community resources like #softprompts on the [KoboldAI Discord](https://discord.gg/XuQWadgU9k) or the [community hosted mirror](https://storage.henk.tech/KoboldAI/softprompts/) . + +That way we are better protected from any DMCA claims as things can be taken down easier than directly on Github. If you have a copyright free softprompt that you made from scratch and is not based on existing IP that you would like to see officially bundled with KoboldAI issue a pull request with your softprompt. + +Training softprompts can be done for free with the [mtj-softtuner colab](https://colab.research.google.com/github/VE-FORBRYDERNE/mtj-softtuner/blob/main/mtj-softtuner.ipynb) , in that case you can leave most of the settings default. Your source data needs to be a folder with text files that are UTF-8 formatted and contain Unix line endings. + +## Userscripts + +Userscripts are scripts that can automate tasks in KoboldAI, or modify the AI behavior / input / output. +Scripting is done in LUA5.4 (Lua does not need to be separately installed as long as you got all the python requirements) and has sandboxing to help protect you from malicious behavior. Even with these measures in place we strongly advice you only run userscripts from places you trust and/or understand, otherwise consult the community for advice on how safe the script might be. + +Inside the userscripts folder you will find our kaipreset scripts, these are default scripts that we think will be useful for our users. These scripts are automatically overwritten when you update KoboldAI, if you wish to modify these scripts make sure to first rename them to something else that does not contain kaipreset so your changes are not lost. These scripts range from a You Bias filter that prevents the AI from addressing characters as you. Ways to be able to prevent the AI from using words, word replacements and more. + +Along with our preset scripts we also ship examples in the examples folder that merely serve as a demonstration and do not enhance your usage of KoboldAI. To use these scripts make sure to move them out of the examples folder before either using or modifying the script. + +Lastly the all the features of our userscript API are documented inside the API Documentation files inside the userscripts folder. + +For our TPU versions keep in mind that scripts modifying AI behavior relies on a different way of processing that is slower than if you leave these userscripts disabled even if your script only sporadically uses this modifier. If you want to partially use a script at its full speed than you can enable "No Gen Modifiers" to ensure that the parts that would make the TPU slow are not active. ## Contributors This project contains work from the following contributors : - The Gantian - Creator of KoboldAI, has created most features such as the interface, the different AI model / API integrations and in general the largest part of the project. -- VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration and much more. -- Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can. -- Frogging101 - top_k / tfs support +- VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration, scripting support, softpromtps and much more. As well as vastly improving the TPU compatibility and integrating external code into KoboldAI so we could use official versions of Transformers with virtually no downsides. +- Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. He also optimized the model loading so that downloaded models get converted to efficient offline models and that in future models are more likely to work out of the box. Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can. +- Ebolam - Automatic Saving +- Frogging101 - top_k / tfs support (Part of this support was later redone by VE to integrate what was originally inside of finetuneanon's transformers) - UWUplus (Ralf) - Contributed storage systems for community colabs, as well as cleaning up and integrating the website dependencies/code better. He is also the maintainer of flask-cloudflared which we use to generate the cloudflare links. - Javalar - Initial Performance increases on the story_refresh - LexSong - Initial environment file adaptation for conda that served as a basis for the install_requirements.bat overhaul. - Arrmansa - Breakmodel support for other projects that served as a basis for VE FORBRYDERNE's integration. +- Jojorne - Small improvements to the response selection for gens per action. As well as various Model creators who will be listed near their models, and all the testers who helped make this possible! diff --git a/requirements.txt b/requirements.txt index 47c3267a..f077a04e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,6 @@ torch flask-cloudflared flask-ngrok eventlet -lupa==1.10 \ No newline at end of file +lupa==1.10 +markdown +bleach \ No newline at end of file diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 1cdd1d45..d9b403eb 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -13,4 +13,6 @@ Flask-SocketIO flask-cloudflared >= 0.0.5 flask-ngrok eventlet -lupa==1.10 \ No newline at end of file +lupa==1.10 +markdown +bleach \ No newline at end of file diff --git a/static/custom.css b/static/custom.css index aa7e7b36..fd6ebff7 100644 --- a/static/custom.css +++ b/static/custom.css @@ -447,7 +447,7 @@ body.connected #popupfooter, #popupfooter.always-available { #rspopup { width: 800px; background-color: #262626; - margin-top: 150px; + margin-top: 50px; } /*================= Classes =================*/ diff --git a/templates/index.html b/templates/index.html index e0e01bfa..19e96636 100644 --- a/templates/index.html +++ b/templates/index.html @@ -359,9 +359,9 @@

Story quality and topic depends on the model and your settings/suggestion (Around 0.5 temp is recommended).
- This feature works best with finetuned models like GPT-Neo-AID or GPT-Neo-Horni but is limited to what the AI knows.
+ This feature works best with heavily themed models like the Adventure model and can also be influenced with softprompts .
If you get random spam then your model is not capable of using this feature and if you get unrelated stories it does not understand the topic.
- Generated results are unfiltered and can be offensive or unsuitable for children.

+ Generated results are unfiltered and can be offensive or unsuitable for children, the AI can make connections the model/softprompt creator did not intend.

Unsaved data will be lost.

Below you can input a genre suggestion for the AI to loosely base the story on (For example Horror or Cowboy).
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 653f8cf1..40059425 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -450,7 +450,7 @@ class PenalizingCausalTransformer(CausalTransformer): compiling_callback() numseqs = numseqs_aux.shape[0] # These are the tokens that we don't want the AI to ever write - self.badwords = jnp.array([6880, 50256, 42496, 4613, 17414, 22039, 16410, 27, 29, 38430, 37922, 15913, 24618, 28725, 58, 47175, 36937, 26700, 12878, 16471, 37981, 5218, 29795, 13412, 45160, 3693, 49778, 4211, 20598, 36475, 33409, 44167, 32406, 29847, 29342, 42669, 685, 25787, 7359, 3784, 5320, 33994, 33490, 34516, 43734, 17635, 24293, 9959, 23785, 21737, 28401, 18161, 26358, 32509, 1279, 38155, 18189, 26894, 6927, 14610, 23834, 11037, 14631, 26933, 46904, 22330, 25915, 47934, 38214, 1875, 14692, 41832, 13163, 25970, 29565, 44926, 19841, 37250, 49029, 9609, 44438, 16791, 17816, 30109, 41888, 47527, 42924, 23984, 49074, 33717, 31161, 49082, 30138, 31175, 12240, 14804, 7131, 26076, 33250, 3556, 38381, 36338, 32756, 46581, 17912, 49146]) + self.badwords = jnp.array(vars.badwordsids).squeeze() @hk.transform def generate_sample(context, ctx_length): # Give the initial context to the transformer @@ -827,7 +827,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs) global badwords # These are the tokens that we don't want the AI to ever write - badwords = jnp.array([6880, 50256, 42496, 4613, 17414, 22039, 16410, 27, 29, 38430, 37922, 15913, 24618, 28725, 58, 47175, 36937, 26700, 12878, 16471, 37981, 5218, 29795, 13412, 45160, 3693, 49778, 4211, 20598, 36475, 33409, 44167, 32406, 29847, 29342, 42669, 685, 25787, 7359, 3784, 5320, 33994, 33490, 34516, 43734, 17635, 24293, 9959, 23785, 21737, 28401, 18161, 26358, 32509, 1279, 38155, 18189, 26894, 6927, 14610, 23834, 11037, 14631, 26933, 46904, 22330, 25915, 47934, 38214, 1875, 14692, 41832, 13163, 25970, 29565, 44926, 19841, 37250, 49029, 9609, 44438, 16791, 17816, 30109, 41888, 47527, 42924, 23984, 49074, 33717, 31161, 49082, 30138, 31175, 12240, 14804, 7131, 26076, 33250, 3556, 38381, 36338, 32756, 46581, 17912, 49146]) + badwords = jnp.array(vars.badwordsids).squeeze() if not path.endswith("/"): path += "/"