Merge branch 'united' into neox

2025-02-18 20:50:45 +01:00 · 2022-05-12 17:29:34 -04:00 · 2022-05-12 17:29:34 -04:00 · 39b770d6ef
commit 39b770d6ef
parent a61ba0d000 376e76f5da
12 changed files with 296 additions and 184 deletions
--- a/.gitignore
+++ b/.gitignore
@ -24,6 +24,7 @@ softprompts
 models
 !models/models go here.txt
 Uninstall
 .ipynb_checkpoints
 # Ignore PyCharm project files.
 .idea
--- a/notebook.bat
+++ b/notebook.bat
@ -7,23 +7,23 @@ IF %M%==2 GOTO subfolder
 IF %M%==3 GOTO drivemap_B
 :subfolder
-umamba.exe install --no-shortcuts -r miniconda3 -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r miniconda3 -n base -c conda-forge jupyterlab jupyterlab-git
 call miniconda3\condabin\activate
-jupyter notebook
+jupyter-lab
 cmd /k
 :drivemap
 subst K: miniconda3 >nul
-umamba.exe install --no-shortcuts -r K:\python\ -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r K:\python\ -n base -c conda-forge jupyterlab jupyterlab-git
 call K:\python\condabin\activate
-jupyter notebook
+jupyter-lab
 subst K: /D
 cmd /k
 :drivemap_B
 subst B: miniconda3 >nul
-umamba.exe install --no-shortcuts -r B:\python\ -n base -c conda-forge jupyter
+umamba.exe install --no-shortcuts -r B:\python\ -n base -c conda-forge jupyterlab jupyterlab-git
 call B:\python\condabin\activate
-jupyter notebook
+jupyter-lab
 subst B: /D
 cmd /k
--- a/aiserver.py
+++ b/aiserver.py
@ -90,7 +90,7 @@ adventurelist= [
    ["Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"],
    ["Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"],
    ["Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"],
-    ["Adventure 125M (Mia)", "KoboldAI/GPT-Neo-125M-AID", "2GB"],
+    ["Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"],
    ["Return to Main Menu", "Return", ""],
 ]
@ -118,6 +118,7 @@ nsfwlist= [
 chatlist= [
    ["Convo 6B (Chatbot)", "hitomi-team/convo-6B", "16GB"],
    ["C1 6B (Chatbot)", "hakurei/c1-6B", "16GB"],
    ["C1 1.3B (Chatbot)", "iokru/c1-1.3B", "6GB"],
    ["Return to Main Menu", "Return", ""],
 ]
 gptneolist = [
@ -514,7 +515,7 @@ def loadmodelsettings():
                js   = json.load(open(vars.custmodpth.replace('/', '_') + "/config.json", "r"))            
        except Exception as e:
            js   = {}
-    if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
+    if vars.model_type == "xglm" or vars.model_type == "opt" or js.get("compat", "j") == "fairseq_lm":
        vars.newlinemode = "s"  # Default to </s> newline mode if using XGLM
    vars.modelconfig = js
    if("badwordsids" in js):
@ -783,6 +784,7 @@ parser.add_argument("--host", action='store_true', help="Optimizes KoboldAI for
 parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
 parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
 parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
 parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
 parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
 parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
 parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
@ -794,6 +796,7 @@ parser.add_argument("--colab", action='store_true', help="Optimize for Google Co
 parser.add_argument("--nobreakmodel", action='store_true', help="Disables Breakmodel support completely.")
 parser.add_argument("--unblock", action='store_true', default=False, help="Unblocks the KoboldAI port to be accessible from other machines without optimizing for remote play (It is recommended to use --host instead)")
 parser.add_argument("--quiet", action='store_true', default=False, help="If present will suppress any story related text from showing on the console")
 parser.add_argument("--no_aria2", action='store_true', default=False, help="Prevents KoboldAI from using aria2 to download huggingface models more efficiently, in case aria2 is causing you issues")
 parser.add_argument("--lowmem", action='store_true', help="Extra Low Memory loading for the GPU, slower but memory does not peak to twice the usage")
 parser.add_argument("--savemodel", action='store_true', help="Saves the model to the models folder even if --colab is used (Allows you to save models to Google Drive)")
 args: argparse.Namespace = None
@ -804,6 +807,7 @@ else:
    args = parser.parse_args()
 vars.model = args.model;
 vars.revision = args.revision
 if args.colab:
    args.remote = True;
@ -866,19 +870,19 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe
    from transformers import AutoConfig
    if(os.path.isdir(vars.custmodpth.replace('/', '_'))):
        try:
-            model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), cache_dir="cache/")
+            model_config = AutoConfig.from_pretrained(vars.custmodpth.replace('/', '_'), revision=vars.revision, cache_dir="cache")
            vars.model_type = model_config.model_type
        except ValueError as e:
            vars.model_type = "not_found"
    elif(os.path.isdir("models/{}".format(vars.custmodpth.replace('/', '_')))):
        try:
-            model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), cache_dir="cache/")
+            model_config = AutoConfig.from_pretrained("models/{}".format(vars.custmodpth.replace('/', '_')), revision=vars.revision, cache_dir="cache")
            vars.model_type = model_config.model_type
        except ValueError as e:
            vars.model_type = "not_found"
    else:
        try:
-            model_config = AutoConfig.from_pretrained(vars.custmodpth, cache_dir="cache/")
+            model_config = AutoConfig.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
            vars.model_type = model_config.model_type
        except ValueError as e:
            vars.model_type = "not_found"
@ -1110,6 +1114,15 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
        import transformers.generation_utils
        from transformers import __version__ as transformers_version
        from transformers import PreTrainedModel
        old_from_pretrained = PreTrainedModel.from_pretrained.__func__
        @classmethod
        def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
            if not args.no_aria2:
                utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
            return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
        PreTrainedModel.from_pretrained = new_from_pretrained
        # Lazy loader
        import torch_lazy_loader
        def get_lazy_load_callback(n_layers, convert_to_float16=True):
@ -1424,8 +1437,8 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
            model_config = open(vars.custmodpth + "/config.json", "r")
            js   = json.load(model_config)
            with(maybe_use_float16()):
-                model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, cache_dir="cache/")
+                model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
-            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache/")
+            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
            vars.modeldim = get_hidden_size_from_model(model)
            # Is CUDA available? If so, use GPU, otherwise fall back to CPU
            if(vars.hascuda and vars.usegpu):
@ -1460,45 +1473,45 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
                    lowmem = {}
                if(os.path.isdir(vars.custmodpth)):
                    try:
-                        tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, cache_dir="cache")
+                        tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                    except Exception as e:
                        try:
-                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                        except Exception as e:
-                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
                    try:
-                        model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache", **lowmem)
+                        model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
                    except Exception as e:
-                        model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache", **lowmem)
+                        model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
                elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
                    try:
-                        tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                        tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
                    except Exception as e:
                        try:
-                            tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
                        except Exception as e:
-                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
                    try:
-                        model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache", **lowmem)
+                        model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
                    except Exception as e:
-                        model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache", **lowmem)
+                        model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
                else:
                    try:
-                        tokenizer = AutoTokenizer.from_pretrained(vars.model, cache_dir="cache")
+                        tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
                    except Exception as e:
                        try:
-                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
                        except Exception as e:
-                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                            tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
                    try:
-                        model     = AutoModelForCausalLM.from_pretrained(vars.model, cache_dir="cache", **lowmem)
+                        model     = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
                    except Exception as e:
-                        model     = GPTNeoForCausalLM.from_pretrained(vars.model, cache_dir="cache", **lowmem)
+                        model     = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
                    if not args.colab or args.savemodel:
                        import shutil
                        model = model.half()
-                        model.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
+                        model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
                        tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
                        shutil.rmtree("cache/")
@ -1532,8 +1545,17 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
    else:
        from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
 else:
    from transformers import PreTrainedModel
    old_from_pretrained = PreTrainedModel.from_pretrained.__func__
    @classmethod
    def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        if not args.no_aria2:
            utils.aria2_hook(pretrained_model_name_or_path, **kwargs)
        return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    PreTrainedModel.from_pretrained = new_from_pretrained
    def tpumtjgetsofttokens():
        soft_tokens = None
        if(vars.sp is None):
@ -1622,11 +1644,11 @@ else:
    # If we're running Colab or OAI, we still need a tokenizer.
    if(vars.model == "Colab"):
        from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
        loadsettings()
    elif(vars.model == "OAI"):
        from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
        loadsettings()
    # Load the TPU backend if requested
    elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
@ -1812,7 +1834,7 @@ def lua_decode(tokens):
    if("tokenizer" not in globals()):
        from transformers import GPT2TokenizerFast
        global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
    return utils.decodenewlines(tokenizer.decode(tokens))
 #==================================================================#
@ -1824,7 +1846,7 @@ def lua_encode(string):
    if("tokenizer" not in globals()):
        from transformers import GPT2TokenizerFast
        global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
    return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
 #==================================================================#
@ -3080,7 +3102,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
    if("tokenizer" not in globals()):
        from transformers import GPT2TokenizerFast
        global tokenizer
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
    # Calculate token budget
    prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@ -137,7 +137,7 @@
        "  path = \"\"\n",
        "  download = \"\"\n",
        "elif Model == \"C1 6B\":\n",
-        "  Model = \"hakurei/C1-6B\"\n",
+        "  Model = \"hakurei/c1-6B\"\n",
        "  path = \"\"\n",
        "  download = \"\"\n",
        "else:\n",
--- a/colabkobold.sh
+++ b/colabkobold.sh
@ -162,7 +162,7 @@ if [ "$init" != "skip" ]; then
    fi
    # Make sure Colab has the system dependencies
-    sudo apt install netbase -y
+    sudo apt install netbase aria2 -y
    npm install -g localtunnel
 fi
@ -186,8 +186,7 @@ fi
 #Download routine for Aria2c scripts
 if [ ! -z ${aria2+x} ]; then
-    apt install aria2 -y
+    curl -L $aria2 | aria2c -x 10 -s 10 -j 10 -c -i- -d$dloc --user-agent=KoboldAI --file-allocation=none
    curl -L $aria2 | aria2c -c -i- -d$dloc --user-agent=KoboldAI --file-allocation=none
 fi
 #Extract the model with 7z
--- a/install_requirements.sh
+++ b/install_requirements.sh
@ -0,0 +1,16 @@
 #!/bin/bash
 if [[ $1 = "cuda" ]]; then
 wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
 bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
 # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
 bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
 exit
 fi
 if [[ $1 = "rocm" ]]; then
 wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
 bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
 # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
 bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
 exit
 fi
 echo Please specify either CUDA or ROCM
--- a/play-rocm.sh
+++ b/play-rocm.sh
@ -1,3 +1,5 @@
-wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+#!/bin/bash
-bin/micromamba create -f environments/rocm.yml -r runtime -n koboldai-rocm -y
+if [ ! -f "runtime/envs/koboldai-rocm/bin/python" ]; then
 ./install_requirements.sh rocm
 fi
 bin/micromamba run -r runtime -n koboldai-rocm python aiserver.py $*
--- a/play.ipynb
+++ b/play.ipynb
@ -1,45 +0,0 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "KoboldAI Jupyter",
      "provenance": [],
      "authorship_tag": "ABX9TyMDTbAhtDnKJa+aIEaQjpsL"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "TPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# KoboldAI Launcher for generic Jupyter Notebooks\n",
        "This notebook is meant as a way to easily launch KoboldAI on existing Jupyter instances that already have KoboldAI installed (For example a custom Saturn Cloud or Paperspace instance).\n",
        "\n",
        "For Google Colab please check out our Google Colab edition available at : https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb"
      ],
      "metadata": {
        "id": "hMRnGz42Xsy3"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "40B1QvI3Xv02"
      },
      "outputs": [],
      "source": [
        "!pip install -r requirements.txt\n",
        "!python3 aiserver.py --remote"
      ]
    }
  ]
 }
--- a/play.sh
+++ b/play.sh
@ -1,3 +1,5 @@
-wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+#!/bin/bash
-bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
+if [ ! -f "runtime/envs/koboldai/bin/python" ]; then
 ./install_requirements.sh cuda
 fi
 bin/micromamba run -r runtime -n koboldai python aiserver.py $*
--- a/readme.md
+++ b/readme.md
@ -1,4 +1,4 @@
-# KoboldAI - Your gateway to GPT writing
+## KoboldAI - Your gateway to GPT writing
 This is a browser-based front-end for AI-assisted writing with multiple local & remote AI models. It offers the standard array of tools, including Memory, Author's Note, World Info, Save & Load, adjustable AI settings, formatting options, and the ability to import existing AI Dungeon adventures. You can also turn on Adventure mode and play the game like AI Dungeon Unleashed.
@ -12,9 +12,9 @@ By default KoboldAI will run in a generic mode optimized for writing, but with t
 The gameplay will be slightly different than the gameplay in AI Dungeon because we adopted the Type of the Unleashed fork, giving you full control over all the characters because we do not automatically adapt your sentences behind the scenes. This means you can more reliably control characters that are not you.
-As a result of this what you need to type is slightly different, in AI Dungeon you would type ***take the sword*** while in KoboldAI you would type it like a sentence such as ***You take the sword*** and this is best done with the word You instead of I.
+As a result of this what you need to type is slightly different, in AI Dungeon you would type _**take the sword**_ while in KoboldAI you would type it like a sentence such as _**You take the sword**_ and this is best done with the word You instead of I.
-To speak simply type : *You say "We should probably gather some supplies first"*
+To speak simply type : _You say "We should probably gather some supplies first"_  
 Just typing the quote might work, but the AI is at its best when you specify who does what in your commands.
 If you want to do this with your friends we advise using the main character as You and using the other characters by their name if you are playing on a model trained for Adventures. These models assume there is a You in the story. This mode does usually not perform well on Novel models because they do not know how to handle the input those are best used with regular story writing where you take turns with the AI.
@ -27,7 +27,7 @@ If you want to use KoboldAI as a writing assistant this is best done in the regu
 In chatbot mode you can use a suitable model as a chatbot, this mode automatically adds your name to the beginning of the sentences and prevents the AI from talking as you. To use it properly you must write your story opening as both characters in the following format (You can use your own text) :
-``` ChatBot Opening Example
+```plaintext
 Bot : Hey!
 You : Hey Boyname, how have you been?
 Bot : Been good! How about you?
@ -42,8 +42,6 @@ This mode works the best on either a Generic model or a chatbot model specifical
 Novel or Adventure models are not recommended for this feature but might still work but can derail away from the conversation format quickly.
 ## Play KoboldAI online for free on Google Colab (The easiest way to play)
 If you would like to play KoboldAI online for free on a powerful computer you can use Google Colaboraty. We provide two editions, a TPU and a GPU edition with a variety of models available. These run entirely on Google's Servers and will automatically upload saves to your Google Drive if you choose to save a story (Alternatively, you can choose to download your save instead so that it never gets stored on Google Drive). Detailed instructions on how to use them are at the bottom of the Colab's.
@ -52,18 +50,53 @@ Each edition features different models and requires different hardware to run, t
 ### [Click here for the TPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)
 | Model                          | Size   | Type     | Drive Space | Description                                                  |
 | ------------------------------ | ------ | --------- | ----------- | ------------------------------------------------------------ |
 | Skein 6B by VE_FORBDRYDERNE    | 6B TPU | Hybrid    | 0 GB         | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |
 | Adventure 6B by VE_FORBRYDERNE | 6B TPU | Adventure | 0 GB         | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
 | Lit 6B by Haru                 | 6B TPU | NSFW      | 8 GB /  12 GB | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
 | Generic 6B by EleutherAI       | 6B TPU | Generic   | 10 GB / 12 GB | GPT-J-6B is what all other models are based on, if you need something that has no specific bias towards any particular subject this is the model for you. Best used when the other models are not suitable for what you wish to do. Such as homework assistance, blog writing, coding and more. It needs more hand holding than other models and is more prone to undesirable formatting changes. |
 | C1 6B by Haru                  | 6B TPU | Chatbot   | 8 GB /  12 GB | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
 | Model | Size | Style | Description |
 | --- | --- | --- | --- |
 | Janeway by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
 | Shinen by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |
 | Skein by VE\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |
 | Adventure by VE\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
 | Lit by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |
 | Convo | 6B | Chatbot | Convo-6B is a GPT-J 6B model fine-tuned on a collection of high quality open source datasets which amount to 6 million messages. The primary goal of the model is to provide improved performance and generalization when generating multi-turn dialogue for characters that were not present from within the fine tuning data. The prompted performance has especially improved over the predecessor model [C1-6B](https://huggingface.co/hakurei/c1-6B). |
 | C1 by Haru | 6B | Chatbot | C1 has been trained on various internet chatrooms, it makes the basis for an interesting chatbot model and has been optimized to be used in the Chatmode. |
 | Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |
 | Fairseq Dense | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |
 | GPT-J-6B by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |
 # [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
 | Model                                                        | Size     | Style      | Description                                                  |
 | ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
 | [GPT-Neo-2.7B-Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B GPU | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |
 | [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel      | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
 | [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
 | [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
 | [GPT-Neo-2.7B-Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B GPU | NSFW       | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |
 | [GPT-Neo-2.7B-Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B GPU | NSFW       | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |
 | [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU    | Generic    | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
 | Style     | Description                                                  |
 | --------- | ------------------------------------------------------------ |
 | Novel     | For regular story writing, not compatible with Adventure mode or other specialty modes. |
 | NSFW      | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
 | Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel style model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
 | Chatbot   | These models are specifically trained for chatting and are best used with the Chatmode enabled. Typically trained on either public chatrooms or private chats. |
 | Generic   | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
 ---
 ## Tips to get the most out of Google Colab
 -  Google will occationally show a Captcha, typically after it has been open for 30 minutes but it can be more frequent if you often use Colab. Make sure to do these properly, or you risk getting your instance shut down and getting a lower priority towards the TPU's.
 - KoboldAI uses Google Drive to store your files and settings, if you wish to upload a softprompt or userscript this can be done directly on the Google Drive website. You can also use this to download backups of your KoboldAI related files or upload models of your own.
 - Don't want to save your stories on Google Drive for privacy reasons? Do not use KoboldAI's save function and instead click Download as .json, this will automatically download the story to your own computer without ever touching Google's harddrives. You can load this back trough the Load from file option.
 - Google shut your instance down unexpectedly? You can still make use of the Download as .json button to recover your story as long as you did not close the KoboldAI window. You can then load this back up in your next session.
 - Done with KoboldAI? Go to the Runtime menu, click on Manage Sessions and terminate your open sessions that you no longer need. This trick can help you maintain higher priority towards getting a TPU.
 - Models stored on Google Drive typically load faster than models we need to download from the internet.
 ### [Click here for the GPU Edition Colab](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)
 | Model | Size | Type | Description |
-| ------------------------------------------------------------ | -------- | ---------- | ------------------------------------------------------------ |
+| --- | --- | --- | --- |
 | [GPT-Neo-2.7B-Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B GPU | Novel | Picard is a model trained for SFW Novels based on GPT-Neo-2.7B. It is focused on Novel Type writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |
 | [GPT-Neo-2.7B-AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B GPU | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |
 | [GPT-Neo-2.7B-Horni-LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B GPU | Novel | This model is based on GPT-Neo-2.7B-Horni and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |
@ -72,8 +105,9 @@ Each edition features different models and requires different hardware to run, t
 | [GPT-Neo-2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B GPU | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |
 ### Model Types
 | Type | Description |
-| --------- | ------------------------------------------------------------ |
+| --- | --- |
 | Novel | For regular story writing, not compatible with Adventure mode or other specialty modes. |
 | NSFW | Indicates that the model is strongly biased towards NSFW content and is not suitable for children, work environments or livestreaming. Most NSFW models are also Novel models in nature. |
 | Adventure | These models are excellent for people willing to play KoboldAI like a Text Adventure game and are meant to be used with Adventure mode enabled. Even if you wish to use it as a Novel Type model you should always have Adventure mode on and set it to story. These models typically have a strong bias towards the use of the word You and without Adventure mode enabled break the story flow and write actions on your behalf. |
@ -81,7 +115,6 @@ Each edition features different models and requires different hardware to run, t
 | Hybrid | Hybrid models are a blend between different Types, for example they are trained on both Novel stories and Adventure stories. These models are great variety models that you can use for multiple different playTypes and modes, but depending on your usage you may need to enable Adventure Mode or the You bias (in userscripts). |
 | Generic | Generic models are not trained towards anything specific, typically used as a basis for other tasks and models. They can do everything the other models can do, but require much more handholding to work properly. Generic models are an ideal basis for tasks that we have no specific model for, or for experiencing a softprompt in its raw form. |
 ## Install KoboldAI on your own computer
 KoboldAI has a large number of dependencies you will need to install on your computer, unfortunately Python does not make it easy for us to provide instructions that work for everyone. The instructions below will work on most computers, but if you have multiple versions of Python installed conflicts can occur.
@ -102,25 +135,34 @@ The easiest way for Windows users is to use the [offline installer](https://sour
 ### Installing KoboldAI Github release on Windows 10 or higher using the KoboldAI Runtime Installer
 1.  Extract the .zip to a location you wish to install KoboldAI, you will need roughly 20GB of free space for the installation (this does not include the models).
-2. Open install_requirements.bat as **administrator**.
+2.  Open install\_requirements.bat as **administrator**.
 3.  Choose the regular version of Transformers (Option 1), finetuneanon is depreciated and no longer recommended.
 4.  You will now be asked to choose the installation mode, we **strongly** recommend the Temporary B: drive option. This option eliminates most installation issues and also makes KoboldAI portable. The B: drive will be gone after a reboot and will automatically be recreated each time you play KoboldAI.
 5.  The installation will now automatically install its requirements, some stages may appear to freeze do not close the installer until it asks you to press a key. Before pressing a key to exit the installer please check if errors occurred. Most problems with the game crashing are related to installation/download errors. Disabling your antivirus can help if you get errors.
 6.  Use play.bat to start KoboldAI.
-### Manual installation / Linux / Mac
+### Installing KoboldAI on Linux using the KoboldAI Runtime (Easiest)
 1.  Clone the URL of this Github repository (For example git clone [https://github.com/koboldai/koboldai-client](https://github.com/koboldai/koboldai-client) )
 2.  AMD user? Make sure ROCm is installed if you want GPU support. Is yours not compatible with ROCm? Follow the usual instructions.
 3.  Run play.sh or if your AMD GPU supports ROCm use play-rocm.sh
 KoboldAI will now automatically configure its dependencies and start up, everything is contained in its own conda runtime so we will not clutter your system. The files will be located in the runtime subfolder. If at any point you wish to force a reinstallation of the runtime you can do so with the install\_requirements.sh file. While you can run this manually it is not neccesary.
 ### Manual installation / Mac
 We can not provide a step by step guide for manual installation due to the vast differences between the existing software configuration and the systems of our users.
 If you would like to manually install KoboldAI you will need some python/conda package management knowledge to manually do one of the following steps :
 1.  Use our bundled environments files to install your own conda environment, this should also automatically install CUDA (Recommended, you can get Miniconda from https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links). The recommended configuration is huggingface.yml for CUDA users and rocm.yml for ROCm users.
-2. If you have a working copy of Docker for either CUDA or ROCm try play-cuda.sh or play-rocm.sh to launch the docker versions. In this case the installation is mostly automatic.
+2.  If conda is proving difficult you could also look inside requirements.txt for the required dependencies and try to install them yourself. This will likely be a mixture of pip and your native package manager, just installing our requirements.txt is not recommended since we assume local users will run conda to get all dependencies. For local installations definitely prioritize conda as that is a better way for us to enforce that you have the compatible versions.
-3. If conda is proving difficult you could also look inside requirements.txt for the required dependencies and try to install them yourself. This will likely be a mixture of pip and your native package manager, just installing our requirements.txt is not recommended since to speed things up we do not force any version changes. For local installations definitely prioritize conda as that is a better way for us to enforce you have the latest compatible versions.
+3.  Clone our Github or download the zip file.
 4.  Now start KoboldAI with aiserver.py and not with our play.bat or play.sh files.
-### AMD GPU's
+### AMD GPU's (Linux only)
-AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. 
+AMD GPU's have terrible compute support, this will currently not work on Windows and will only work for a select few Linux GPU's. [You can find a list of the compatible GPU's here](https://github.com/RadeonOpenCompute/ROCm#Hardware-and-Software-Support). Any GPU that is not listed is guaranteed not to work with KoboldAI and we will not be able to provide proper support on GPU's that are not compatible with the versions of ROCm we require. Make sure to first install ROCm on your Linux system using a guide for your distribution, after that you can follow the usual linux instructions above.
 ### Troubleshooting
@ -140,41 +182,13 @@ In general, the less versions of Python you have on your system the higher your
 GPU not found errors can be caused by one of two things, either you do not have a suitable Nvidia GPU (It needs Compute Capability 5.0 or higher to be able to play KoboldAI). Your Nvidia GPU is supported by KoboldAI but is not supported by the latest version of CUDA. Your Nvidia GPU is not yet supported by the latest version of CUDA or you have a dependency conflict like the ones mentioned above.
-Like with Python version conflicts we recommend uninstalling CUDA from your system if you have manually installed it and do not need it for anything else and trying again. If your GPU needs CUDA10 to function open environments\finetuneanon.yml and add a line that says - cudatoolkit=10.2 underneath dependencies: . After this you can run the installer again (Pick the option to delete the existing files) and it will download a CUDA10 compatible version.
+Like with Python version conflicts we recommend uninstalling CUDA from your system if you have manually installed it and do not need it for anything else and trying again. If your GPU needs CUDA10 to function open environments\\finetuneanon.yml and add a line that says - cudatoolkit=10.2 underneath dependencies: . After this you can run the installer again (Pick the option to delete the existing files) and it will download a CUDA10 compatible version.
 If you do not have a suitable Nvidia GPU that can run on CUDA10 or Higher and that supports Compute Capabilities 5.0 or higher we can not help you get the game detected on the GPU. Unless you are following our ROCm guide with a compatible AMD GPU.
 #### vocab.json / config.json is not found error
-If you get these errors you either did not select the correct folder for your custom model or the model you have downloaded is not (yet) compatible with KoboldAI. There exist a few models out there that are compatible and provide a pytorch_model.bin file but do not ship all the required files. In this case try downloading a compatible model of the same kind (For example another GPT-Neo if you downloaded a GPT-Neo model) and replace the pytorch_model.bin file with the one you are trying to run. Chances are this will work fine.
+If you get these errors you either did not select the correct folder for your custom model or the model you have downloaded is not (yet) compatible with KoboldAI. There exist a few models out there that are compatible and provide a pytorch\_model.bin file but do not ship all the required files. In this case try downloading a compatible model of the same kind (For example another GPT-Neo if you downloaded a GPT-Neo model) and replace the pytorch\_model.bin file with the one you are trying to run. Chances are this will work fine.
 ## KoboldAI Compatible Models
 Most of the high quality models have been integrated in the menu, these models have their download link removed since the easiest way to obtain them is to run them directly from the menu. KoboldAI will automatically download and convert the models to a offline format for later use.
 If you have old 6B versions which end in -hf they will no longer be compatible with the newer versions of transformers and will no longer behave correctly. It is highly recommended that you install the official version of transformers (offline installers for KoboldAI contain this version by default) and redownload these models from the menu to get compatible versions. If you have very limited internet we will for a limited time also offer finetuneanon's fork in the install_requirements.bat file, when using that option you will not be able to use the 6B models in our main menu so definitely upgrade when your internet allows.
 The VRAM requirements amounts are the recommended amounts for fast smooth play, playing with lower VRAM is possible but then you may need to either lower the amount of tokens in the settings, or you may need to put less layers on your GPU causing a significant performance loss. 
 **For CPU players and during the loading regular RAM usage is double of what we list here.**
 | **Model**                                                    | Type                              | **(V)RAM** | Repetition Penalty | Description                                                  |
 | ------------------------------------------------------------ | --------------------------------- | ---------- | ------------------ | ------------------------------------------------------------ |
 | Skein 6B by VE_FORBDRYERNE | Adventure Novel / 6B / Neo Custom | 16GB       | 1.1                | Skein is our flagship 6B model, it is a hybrid between a Adventure model and a Novel model. Best used with either Adventure mode or the You Bias userscript enabled. Skein has been trained on high quality Novels along with CYOA adventure stories and is not as wackey as the Adventure model. It also has tagging support. |
 | Adventure 6B by VE_FORBRYDERNE | Adventure / 6B / Neo Custom       | 16GB       | 1.2                | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |
 | Adventure 2.7B by melastashco | Adventure / 2.7B / Neo Custom     | 8GB        | 2.0                | This is one of the closest replications of the original AI Dungeon Classic model. Tuned on the same data that got uploaded alongside AI Dungeon. In KoboldAI we noticed this model performs better than the conversions of the original AI Dungeon model. It has all the traits you expect of AI Dungeon Classic while not having as many artifacts as this model was trained specifically for KoboldAI. Must be played with Adventure mode enabled to prevent it from doing actions on your behalf. |
 | Horni 2.7B by finetuneanon | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | One of the best novel models available for 2.7B focused on NSFW content. This model trains the AI to write in a story like fashion using a very large collection of Literotica stories. It is one of the original finetuned models for 2.7B. |
 | Horni-LN 2.7B by finetuneanon | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | This model is much like the one above, but has been additionally trained on regular light novels. More likely to go SFW and is more focused towards themes found in these light novels over general cultural references. This is a good model for Novel writing especially if you want to add erotica to the mix. |
 | Picard 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | Picard is another Novel model, this time exclusively focused on SFW content of various genres. Unlike the name suggests this goes far beyond Star Trek stories and is not exclusively sci-fi. |
 | Janeway 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity.|
 | Shinen 2.7B by Mr Seeker | Novel / 2.7B / Neo Custom         | 8GB        | 2.0                | The most NSFW of them all, Shinen WILL make things sexual. This model will assume that whatever you are doing is meant to be a sex story and will sexualize constantly. It is designed for people who find Horni to tame. It was trained on SexStories instead of Literotica and was trained on tags making it easier to guide the AI to the right context. |
 | [AID-16Bit](https://storage.henk.tech/KoboldAI/aid-16bit.zip) | Adventure / 1.5B / GPT-2 Custom   | 4GB        | 2.0                | The original AI Dungeon Classic model converted to Pytorch and then converted to a 16-bit Model making it half the size. |
 | [model_v5_pytorch](https://storage.henk.tech/KoboldAI/model_v5_pytorch.zip) (AI Dungeon's Original Model) | Adventure / 1.5B / GPT-2 Custom   | 8GB       | 2.0                | This is the original AI Dungeon Classic model converted to the Pytorch format compatible with AI Dungeon Clover and KoboldAI. We consider this model inferior to the GPT-Neo version because it has more artifacting due to its conversion. This is however the most authentic you can get to AI Dungeon Classic. |
 | [Novel 774M](https://storage.henk.tech/KoboldAI/Novel%20model%20774M.rar) | Novel / 774M / GPT-2 Custom       | 4GB        | 2.0                | Novel 774M is made by the AI Dungeon Clover community, because of its small size and novel bias it is more suitable for CPU players that want to play with speed over substance or players who want to test a GPU with a low amount of VRAM. These performance savings are at the cost of story quality and you should not expect the kind of in depth story capabilities that the larger models offer. It was trained for SFW stories. |
 | [Smut 774M](https://storage.henk.tech/KoboldAI/Smut%20model%20774M%2030K.rar) | Novel / 774M / GPT-2 Custom       | 4GB        | 2.0                | The NSFW version of the above, its a smaller GPT-2 based model made by the AI Dungeon Clover community. Gives decent speed on a CPU at the cost of story quality like the other 774M models. |
 | [Mia (GPT-Neo-125M-AID)](https://huggingface.co/KoboldAI/GPT-Neo-125M-AID) by Henk717 | Adventure / 125M / Neo Custom     | 1GB        | 2.0                | Mia is the smallest Adventure model, it runs at very fast speeds on the CPU which makes it a good testing model for developers who do not have GPU access. Because of its small size it will constantly attempt to do actions on behalf of the player and it will not produce high quality stories. If you just need a small model for a quick test, or if you want to take the challenge of trying to run KoboldAI entirely on your phone this would be an easy model to use due to its small RAM requirements and fast (loading) speeds. |
 ## Softprompts
@ -203,16 +217,17 @@ For our TPU versions keep in mind that scripts modifying AI behavior relies on a
 This project contains work from the following contributors :
- The Gantian - Creator of KoboldAI, has created most features such as the interface, the different AI model / API integrations and in general the largest part of the project.
+*   The Gantian - Creator of KoboldAI, has created most features such as the interface, the different AI model / API integrations and in general the largest part of the project.
- VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration, scripting support, softpromtps and much more. As well as vastly improving the TPU compatibility and integrating external code into KoboldAI so we could use official versions of Transformers with virtually no downsides.
+*   VE FORBRYDERNE - Contributed many features such as the Editing overhaul, Adventure Mode, expansions to the world info section, breakmodel integration, scripting support, softpromtps and much more. As well as vastly improving the TPU compatibility and integrating external code into KoboldAI so we could use official versions of Transformers with virtually no downsides.
- Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. He also optimized the model loading so that downloaded models get converted to efficient offline models and that in future models are more likely to work out of the box.  Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can.
+*   Henk717 - Contributed the installation scripts, this readme, random story generator, the docker scripts, the foundation for the commandline interface and other smaller changes as well as integrating multiple parts of the code of different forks to unite it all. He also optimized the model loading so that downloaded models get converted to efficient offline models and that in future models are more likely to work out of the box. Not all code Github attributes to Henk717 is by Henk717 as some of it has been integrations of other people's work. We try to clarify this in the contributors list as much as we can.
- Ebolam - Automatic Saving
+*   Ebolam - Automatic Saving
- Frogging101 - top_k / tfs support (Part of this support was later redone by VE to integrate what was originally inside of finetuneanon's transformers)
+*   Frogging101 - top\_k / tfs support (Part of this support was later redone by VE to integrate what was originally inside of finetuneanon's transformers)
- UWUplus (Ralf) - Contributed storage systems for community colabs, as well as cleaning up and integrating the website dependencies/code better. He is also the maintainer of flask-cloudflared which we use to generate the cloudflare links.
+*   UWUplus (Ralf) - Contributed storage systems for community colabs, as well as cleaning up and integrating the website dependencies/code better. He is also the maintainer of flask-cloudflared which we use to generate the cloudflare links.
- Javalar - Initial Performance increases on the story_refresh
+*   Javalar - Initial Performance increases on the story\_refresh
- LexSong - Initial environment file adaptation for conda that served as a basis for the install_requirements.bat overhaul.
+*   LexSong - Initial environment file adaptation for conda that served as a basis for the install\_requirements.bat overhaul.
- Arrmansa - Breakmodel support for other projects that served as a basis for VE FORBRYDERNE's integration.
+*   Arrmansa - Breakmodel support for other projects that served as a basis for VE FORBRYDERNE's integration.
- Jojorne - Small improvements to the response selection for gens per action.
+*   Jojorne - Small improvements to the response selection for gens per action.
 *   OccultSage (GooseAI) - Improved support for GooseAI/OpenAI
 As well as various Model creators who will be listed near their models, and all the testers who helped make this possible!
@ -222,4 +237,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t
 KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.).
-umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license.
+umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file.
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@ -1251,39 +1251,39 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
    with torch_lazy_loader.use_lazy_torch_load(callback=callback, dematerialized_modules=True):
        if(os.path.isdir(vars.custmodpth)):
            try:
-                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, cache_dir="cache")
+                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
            except Exception as e:
                try:
-                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                except Exception as e:
-                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
            try:
-                model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
            except Exception as e:
-                model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, cache_dir="cache")
+                model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
        elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
            try:
-                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
            except Exception as e:
                try:
-                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
                except Exception as e:
-                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
            try:
-                model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
            except Exception as e:
-                model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), cache_dir="cache")
+                model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
        else:
            try:
-                tokenizer = AutoTokenizer.from_pretrained(vars.model, cache_dir="cache")
+                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
            except Exception as e:
                try:
-                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
                except Exception as e:
-                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache")
+                    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
            try:
-                model     = AutoModelForCausalLM.from_pretrained(vars.model, cache_dir="cache")
+                model     = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
            except Exception as e:
-                model     = GPTNeoForCausalLM.from_pretrained(vars.model, cache_dir="cache")
+                model     = GPTNeoForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
    #network.state = network.move_xmap(network.state, np.zeros(cores_per_replica))
--- a/utils.py
+++ b/utils.py
@ -1,5 +1,11 @@
 from threading import Timer
 import re
 import shutil
 import json
 import subprocess
 import tempfile
 import requests
 import os
 vars = None
@ -125,3 +131,97 @@ def decodenewlines(txt):
    if(vars.newlinemode == "s"):
        return txt.replace("</s>", '\n')
    return txt
 #==================================================================#
 #  Downloads sharded huggingface checkpoints using aria2c if possible
 #==================================================================#
 def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_dir=None, proxies=None, resume_download=False, local_files_only=False, use_auth_token=None, user_agent=None, revision=None, mirror=None, **kwargs):
    import transformers
    import transformers.modeling_utils
    from huggingface_hub import HfFolder
    if shutil.which("aria2c") is None:  # Don't do anything if aria2 is not installed
        return
    if local_files_only:  # If local_files_only is true, we obviously don't need to download anything
        return
    if os.path.isdir(pretrained_model_name_or_path) or os.path.isfile(pretrained_model_name_or_path) or os.path.isfile(pretrained_model_name_or_path + ".index") or transformers.modeling_utils.is_remote_url(pretrained_model_name_or_path):
        return
    if proxies:
        print("WARNING:  KoboldAI does not support using aria2 to download models from huggingface.co through a proxy.  Disabling aria2 download mode.")
        return
    if use_auth_token:
        if isinstance(use_auth_token, str):
            token = use_auth_token
        else:
            token = HfFolder.get_token()
            if token is None:
                raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.")
    _cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE
    sharded = False
    headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)}
    if use_auth_token:
        headers["authorization"] = f"Bearer {use_auth_token}"
    def is_cached(url):
        try:
            transformers.file_utils.get_from_cache(url, cache_dir=cache_dir, local_files_only=True)
        except FileNotFoundError:
            return False
        return True
    while True:  # Try to get the huggingface.co URL of the model's pytorch_model.bin or pytorch_model.bin.index.json file
        try:
            filename = transformers.modeling_utils.WEIGHTS_INDEX_NAME if sharded else transformers.modeling_utils.WEIGHTS_NAME
        except AttributeError:
            return
        url = transformers.file_utils.hf_bucket_url(pretrained_model_name_or_path, filename, revision=revision, mirror=mirror)
        if is_cached(url) or requests.head(url, allow_redirects=True, proxies=proxies, headers=headers):
            break
        if sharded:
            return
        else:
            sharded = True
    if not sharded:  # If the model has a pytorch_model.bin file, that's the only file to download
        filenames = [transformers.modeling_utils.WEIGHTS_NAME]
    else:  # Otherwise download the pytorch_model.bin.index.json and then let aria2 download all the pytorch_model-#####-of-#####.bin files mentioned inside it
        map_filename = transformers.file_utils.cached_path(url, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, use_auth_token=use_auth_token, user_agent=user_agent)
        with open(map_filename) as f:
            map_data = json.load(f)
        filenames = set(map_data["weight_map"].values())
    urls = [transformers.file_utils.hf_bucket_url(pretrained_model_name_or_path, n, revision=revision, mirror=mirror) for n in filenames]
    if not force_download:
        urls = [u for u in urls if not is_cached(u)]
        if not urls:
            return
    etags = [h.get("X-Linked-Etag") or h.get("ETag") for u in urls for h in [requests.head(u, headers=headers, allow_redirects=False, proxies=proxies, timeout=10).headers]]
    filenames = [transformers.file_utils.url_to_filename(u, t) for u, t in zip(urls, etags)]
    for n in filenames:
        path = os.path.join(_cache_dir, "kai-tempfile." + n + ".aria2")
        if os.path.exists(path):
            os.remove(path)
        path = os.path.join(_cache_dir, "kai-tempfile." + n)
        if os.path.exists(path):
            os.remove(path)
        if force_download:
            path = os.path.join(_cache_dir, n + ".json")
            if os.path.exists(path):
                os.remove(path)
            path = os.path.join(_cache_dir, n)
            if os.path.exists(path):
                os.remove(path)
    aria2_config = "\n".join(f"{u}\n  out=kai-tempfile.{n}" for u, n in zip(urls, filenames)).encode()
    with tempfile.NamedTemporaryFile("w+b", delete=False) as f:
        f.write(aria2_config)
        f.flush()
        p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming", "false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        for line in p.stdout:
            print(line.decode(), end="", flush=True)
        path = f.name
    try:
        os.remove(path)
    except OSError:
        pass
    code = p.wait()
    if code:
        raise OSError(f"aria2 exited with exit code {code}")
    for u, t, n in zip(urls, etags, filenames):
        os.rename(os.path.join(_cache_dir, "kai-tempfile." + n), os.path.join(_cache_dir, n))
        with open(os.path.join(_cache_dir, n + ".json"), "w") as f:
            json.dump({"url": u, "etag": t}, f)