From e7973e13ac712a5834064ddeb6888d5759608eca Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 28 Sep 2022 12:47:50 -0400
Subject: [PATCH 01/11] Fix for GPT models downloading even when present in
 model folder

---
 aiserver.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index c5eca243..5453b065 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -2442,15 +2442,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                     config_path = os.path.join("models/", vars.custmodpth)
                     config_path = os.path.join(config_path, "config.json").replace("\\", "//")
                     model_config = open(config_path, "r")
-                js   = json.load(model_config)
+                #js   = json.load(model_config)
                 with(maybe_use_float16()):
                     try:
-                        model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                        if os.path.exists(vars.custmodpth):
+                            model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                            tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                        elif os.path.exists(os.path.join("models/", vars.custmodpth)):
+                            model = GPT2LMHeadModel.from_pretrained(os.path.join("models/", vars.custmodpth), revision=vars.revision, cache_dir="cache")
+                            tokenizer = GPT2Tokenizer.from_pretrained(os.path.join("models/", vars.custmodpth), revision=vars.revision, cache_dir="cache")
+                        else:
+                            model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                            tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                     except Exception as e:
                         if("out of memory" in traceback.format_exc().lower()):
                             raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
                         raise e
                 tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
+                model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
+                tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
                 vars.modeldim = get_hidden_size_from_model(model)
                 # Is CUDA available? If so, use GPU, otherwise fall back to CPU
                 if(vars.hascuda and vars.usegpu):

From 90022d05c80cc5cdbc482f8f297ff9617c785ab8 Mon Sep 17 00:00:00 2001
From: Divided by Zer0 <mail@dbzer0.com>
Date: Fri, 30 Sep 2022 00:26:55 +0200
Subject: [PATCH 02/11] fix endpoint for get_cluster_models

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 5453b065..15a56c31 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1640,7 +1640,7 @@ def get_cluster_models(msg):
     # Get list of models from public cluster
     logger.init("KAI Horde Models", status="Retrieving")
     try:
-        req = requests.get("{}/models".format(url))
+        req = requests.get("{}/api/v1/models".format(url))
     except requests.exceptions.ConnectionError:
         logger.init_err("KAI Horde Models", status="Failed")
         logger.error("Provided KoboldAI Horde URL unreachable")

From a482ec16d8a8600838f90396a80759b69d7fc810 Mon Sep 17 00:00:00 2001
From: scythe000 <kenmay@kenmay.net>
Date: Fri, 30 Sep 2022 10:29:32 -0700
Subject: [PATCH 03/11] Update aiserver.py - typo fix

Changed 'beakmodel' to 'breakmodel' in the example comment.
---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 15a56c31..38b8dc30 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1324,7 +1324,7 @@ def general_startup(override_args=None):
     parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
     parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
     parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --beakmodel_gpulayers 8,9,11")
+    parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11")
     parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.")
     parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
     parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")

From 8313df8817d5afba7d3553f11ac92ec95c03387e Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Sat, 1 Oct 2022 15:42:54 +0200
Subject: [PATCH 04/11] Localtunnel Default

---
 colab/TPU.ipynb | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb
index d01fedf6..64711466 100644
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@@ -68,7 +68,7 @@
         "\n",
         "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n",
         "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
-        "Provider = \"Cloudflare\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
+        "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
         "\n",
         "import os\n",
         "try:\n",
@@ -162,18 +162,19 @@
       "source": [
         "# TPU Edition Model Descriptions\n",
         "\n",
-        "| Model | Size | Style | Description |\n",
-        "| --- | --- | --- | --- |\n",
-        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
-        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
-        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
-        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
-        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
-        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
-        "\n",
+        "| Model | Style | Description |\n",
+        "| --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Erebus](https://huggingface.co/KoboldAI/OPT-13B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
+        "| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
+        "| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
         "\n",
         "# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
         "\n",

From e1606afc0dd446c6f1f6ab372c655c60d435cafd Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Sat, 1 Oct 2022 15:43:45 +0200
Subject: [PATCH 05/11] GPU Descriptions

---
 colab/TPU.ipynb | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb
index 64711466..6ec346fc 100644
--- a/colab/TPU.ipynb
+++ b/colab/TPU.ipynb
@@ -178,16 +178,20 @@
         "\n",
         "# [GPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/GPU.ipynb)\n",
         "\n",
-        "| Model | Size | Style | Description |\n",
-        "| --- | --- | --- | --- |\n",
-        "| [Nerys 2.7B](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
-        "| [Janeway 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [Picard 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [AID 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [Horni LN 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [Horni 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [Shinen 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [Neo 2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Style | Description |\n",
+        "| --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Erebus](https://huggingface.co/KoboldAI/OPT-2.7B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [OPT](https://huggingface.co/facebook/opt-2.7b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
+        "| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "\n",
         "\n",
         "| Style | Description |\n",
         "| --- | --- |\n",

From 2f45b9311958f9bb137ab82962db449512892a4e Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Sat, 1 Oct 2022 16:58:16 +0200
Subject: [PATCH 06/11] GPU updates

---
 colab/GPU.ipynb | 58 +++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb
index a74ef0cf..ba171275 100644
--- a/colab/GPU.ipynb
+++ b/colab/GPU.ipynb
@@ -67,7 +67,7 @@
         "#@title <b><-- Select your model below and then click this to start KoboldAI</b>\n",
         "#@markdown You can find a description of the models below along with instructions on how to start KoboldAI.\n",
         "\n",
-        "Model = \"Nerys 2.7B\" #@param [\"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
+        "Model = \"Nerys 2.7B\" #@param [\"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n",
         "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
         "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n",
         "\n",
@@ -107,6 +107,14 @@
         "  Model = \"KoboldAI/GPT-Neo-2.7B-Shinen\"\n",
         "  path = \"\"\n",
         "  download = \"\"\n",
+        "elif Model == \"Fairseq Dense 2.7B\":\n",
+        "  Model = \"KoboldAI/fairseq-dense-2.7B\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
+        "elif Model == \"OPT 2.7B\":\n",
+        "  Model = \"facebook/opt-2.7b\"\n",
+        "  path = \"\"\n",
+        "  download = \"\"\n",
         "elif Model == \"Neo 2.7B\":\n",
         "  Model = \"EleutherAI/gpt-neo-2.7B\"\n",
         "  path = \"\"\n",
@@ -126,31 +134,35 @@
       "cell_type": "markdown",
       "source": [
         "# GPU Edition Model Descriptions\n",
-        "| Model | Size | Style | Description |\n",
-        "| --- | --- | --- | --- |\n",
-        "| [Nerys 2.7B](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | 2.7B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
-        "| [Janeway 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | 2.7B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [Picard 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | 2.7B | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [AID 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | 2.7B | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
-        "| [Horni LN 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | 2.7B | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
-        "| [Horni 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | 2.7B | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [Shinen 2.7B](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | 2.7B | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
-        "| [Neo 2.7B](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | 2.7B | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
+        "| Model | Style | Description |\n",
+        "| --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-2.7B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Erebus](https://huggingface.co/KoboldAI/OPT-2.7B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Picard](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Picard) by Mr Seeker | Novel | Picard is a model trained for SFW Novels based on Neo 2.7B. It is focused on Novel style writing without the NSFW bias. While the name suggests a sci-fi model this model is designed for Novels of a variety of genre's. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [AID](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-AID) by melastacho | Adventure | Also know as Adventure 2.7B this is a clone of the AI Dungeon Classic model and is best known for the epic wackey adventures that AI Dungeon Classic players love. |\n",
+        "| [Horni LN](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni-LN) by finetune | Novel | This model is based on Horni 2.7B and retains its NSFW knowledge, but was then further biased towards SFW novel stories. If you seek a balance between a SFW Novel model and a NSFW model this model should be a good choice. |\n",
+        "| [Horni](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Horni) by finetune | NSFW | This model is tuned on Literotica to produce a Novel style model biased towards NSFW content. Can still be used for SFW stories but will have a bias towards NSFW content. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/GPT-Neo-2.7B-Shinen) by Mr Seeker | NSFW | Shinen is an alternative to the Horni model designed to be more explicit. If Horni is to tame for you Shinen might produce better results. While it is a Novel model it is unsuitable for SFW stories due to its heavy NSFW bias. Shinen will not hold back. It is meant to be used in KoboldAI's regular mode. |\n",
+        "| [OPT](https://huggingface.co/facebook/opt-2.7b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-2.7B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger models from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
+        "| [Neo](https://huggingface.co/EleutherAI/gpt-neo-2.7B) by EleutherAI | Generic | This is the base model for all the other 2.7B models, it is best used when you have a use case that we have no other models available for, such as writing blog articles or programming. It can also be a good basis for the experience of some of the softprompts if your softprompt is not about a subject the other models cover. |\n",
         "\n",
         "# [TPU Edition Model Descriptions](https://colab.research.google.com/github/KoboldAI/KoboldAI-Client/blob/main/colab/TPU.ipynb)\n",
         "\n",
-        "| Model | Size | Style | Description |\n",
-        "| --- | --- | --- | --- |\n",
-        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | 13B | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
-        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | 13B | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
-        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | 13B | NSFW | Shinen is an NSFW model designed to be more explicit. Trained on a variety of stories from the website Sexstories it contains many different kinks. |\n",
-        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | 6B | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
-        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | 6B | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
-        "| [Lit](https://huggingface.co/hakurei/lit-6B) by Haru | 6B | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
-        "| Neo(X) by EleutherAI | 20B | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
-        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | 13B | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. |\n",
-        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | 6B | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
-        "\n",
+        "| Model | Style | Description |\n",
+        "| --- | --- | --- |\n",
+        "| [Nerys](https://huggingface.co/KoboldAI/fairseq-dense-13B-Nerys) by Mr Seeker | Novel/Adventure | Nerys is a hybrid model based on Pike (A newer Janeway), on top of the Pike dataset you also get some Light Novels, Adventure mode support and a little bit of Shinen thrown in the mix. The end result is a very diverse model that is heavily biased towards SFW novel writing, but one that can go beyond its novel training and make for an excellent adventure model to. Adventure mode is best played from a second person perspective, but can be played in first or third person as well. Novel writing can be done best from the first or third person. |\n",
+        "| [Erebus](https://huggingface.co/KoboldAI/OPT-13B-Erebus) by Mr Seeker | NSFW | Erebus is our community's flagship NSFW model, being a combination of multiple large datasets that include Literotica, Shinen and erotic novels from Nerys and featuring thourough tagging support it covers the vast majority of erotic writing styles. This model is capable of replacing both the Lit and Shinen models in terms of content and style and has been well received as (one of) the best NSFW models out there. If you wish to use this model for commercial or non research usage we recommend choosing the 20B version as that one is not subject to the restrictive OPT license. |\n",
+        "| [Janeway](https://huggingface.co/KoboldAI/fairseq-dense-13B-Janeway) by Mr Seeker | Novel | Janeway is a model created from Picard's dataset combined with a brand new collection of ebooks. This model is trained on 20% more content than Picard and has been trained on literature from various genres. Although the model is mainly focussed on SFW, romantic scenes might involve a degree of nudity. |\n",
+        "| [Shinen](https://huggingface.co/KoboldAI/fairseq-dense-13B-Shinen) by Mr Seeker | NSFW | Shinen is an NSFW model trained on a variety of stories from the website Sexstories it contains many different kinks. It has been merged into the larger (and better) Erebus model. |\n",
+        "| [Skein](https://huggingface.co/KoboldAI/GPT-J-6B-Skein) by VE\\_FORBRYDERNE | Adventure | Skein is best used with Adventure mode enabled, it consists of a 4 times larger adventure dataset than the Adventure model making it excellent for text adventure gaming. On top of that it also consists of light novel training further expanding its knowledge and writing capabilities. It can be used with the You filter bias if you wish to write Novels with it, but dedicated Novel models can perform better for this task. |\n",
+        "| [Adventure](https://huggingface.co/KoboldAI/GPT-J-6B-Adventure) by VE\\_FORBRYDERNE | Adventure | Adventure is a 6B model designed to mimick the behavior of AI Dungeon. It is exclusively for Adventure Mode and can take you on the epic and wackey adventures that AI Dungeon players love. It also features the many tropes of AI Dungeon as it has been trained on very similar data. It must be used in second person (You). |\n",
+        "| [Lit](https://huggingface.co/hakurei/lit-6B) ([V2](https://huggingface.co/hakurei/litv2-6B-rev3)) by Haru | NSFW | Lit is a great NSFW model trained by Haru on both a large set of Literotica stories and high quality novels along with tagging support. Creating a high quality model for your NSFW stories. This model is exclusively a novel model and is best used in third person. |\n",
+        "| [OPT](https://huggingface.co/facebook/opt-13b) by Metaseq | Generic | OPT is considered one of the best base models as far as content goes, its behavior has the strengths of both GPT-Neo and Fairseq Dense. Compared to Neo duplicate and unnecessary content has been left out, while additional literature was added in similar to the Fairseq Dense model. The Fairseq Dense model however lacks the broader data that OPT does have. The biggest downfall of OPT is its license, which prohibits any commercial usage, or usage beyond research purposes. |\n",
+        "| [Neo(X)](https://huggingface.co/EleutherAI/gpt-neox-20b) by EleutherAI | Generic | NeoX is the largest EleutherAI model currently available, being a generic model it is not particularly trained towards anything and can do a variety of writing, Q&A and coding tasks. 20B's performance is closely compared to the 13B models and it is worth trying both especially if you have a task that does not involve english writing. Its behavior will be similar to the GPT-J-6B model since they are trained on the same dataset but with more sensitivity towards repetition penalty and with more knowledge. |\n",
+        "| [Fairseq Dense](https://huggingface.co/KoboldAI/fairseq-dense-13B) | Generic | Trained by Facebook Researchers this model stems from the MOE research project within Fairseq. This particular version has been converted by us for use in KoboldAI. It is known to be on par with the larger 20B model from EleutherAI and considered as better for pop culture and language tasks. Because the model has never seen a new line (enter) it may perform worse on formatting and paragraphing. Compared to other models the dataset focuses primarily on literature and contains little else. |\n",
+        "| [GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) by EleutherAI | Generic | This model serves as the basis for most other 6B models (Some being based on Fairseq Dense instead). Being trained on the Pile and not biased towards anything in particular it is suitable for a variety of tasks such as writing, Q&A and coding tasks. You will likely get better result with larger generic models or finetuned models. |\n",
         "\n",
         "| Style     | Description                                                  |\n",
         "| --------- | ------------------------------------------------------------ |\n",

From 1a59a4acea2bd16d5b6de3587a00f93b3a78ae23 Mon Sep 17 00:00:00 2001
From: vfbd <ve.forbryderne@gmail.com>
Date: Sun, 2 Oct 2022 16:25:51 -0400
Subject: [PATCH 07/11] Allow changing sampler seed and sampler order from API

---
 aiserver.py        | 53 +++++++++++++++++++++++++++++++++++++++++++++-
 tpu_mtj_backend.py |  9 ++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 38b8dc30..1b74167f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -377,6 +377,7 @@ class vars:
     comregex_ai = re.compile(r'(?:\n<\|(?:.|\n)*?\|>(?=\n|$))|(?:<\|(?:.|\n)*?\|>\n?)')  # Pattern for matching comments to remove them before sending them to the AI
     comregex_ui = re.compile(r'(&lt;\|(?:.|\n)*?\|&gt;)')  # Pattern for matching comments in the editor
     sampler_order = utils.default_sampler_order.copy()
+    rng_states  = {}  # Used by the POST /generate endpoint to store sampler RNG states
     chatmode    = False
     chatname    = "You"
     adventure   = False
@@ -630,7 +631,7 @@ tags = [
 api_version = None  # This gets set automatically so don't change this value
 
 api_v1 = KoboldAPISpec(
-    version="1.1.4",
+    version="1.2.0",
     prefixes=["/api/v1", "/api/latest"],
     tags=tags,
 )
@@ -7450,6 +7451,13 @@ def story_load_validator(name: str):
         raise ValidationError("Must be a valid story name.")
     return True
 
+def permutation_validator(lst: list):
+    if any(not isinstance(e, int) for e in lst):
+        return
+    if min(lst) != 0 or max(lst) != len(lst) - 1 or len(set(lst)) != len(lst):
+        raise ValidationError("Must be a permutation of the first N non-negative integers, where N is the length of this array")
+    return True
+
 class GenerationInputSchema(SamplerSettingsSchema):
     prompt: str = fields.String(required=True, metadata={"description": "This is the submission."})
     use_memory: bool = fields.Boolean(load_default=False, metadata={"description": "Whether or not to use the memory from the KoboldAI GUI when generating text."})
@@ -7469,6 +7477,9 @@ class GenerationInputSchema(SamplerSettingsSchema):
     disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
     frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
     quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
+    sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
+    sampler_seed: Optional[int] = fields.Integer(metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
+    sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
 
 class GenerationResultSchema(KoboldSchema):
     text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."})
@@ -7559,6 +7570,29 @@ def _generate_text(body: GenerationInputSchema):
             "msg": "Server is busy; please try again later.",
             "type": "service_unavailable",
         }}), mimetype="application/json", status=503))
+    if vars.use_colab_tpu:
+        import tpu_mtj_backend
+    if hasattr(body, "sampler_seed"):
+        # If a seed was specified, we need to save the global RNG state so we
+        # can restore it later
+        old_seed = vars.seed
+        old_rng_state = tpu_mtj_backend.get_rng_state() if vars.use_colab_tpu else torch.get_rng_state()
+        vars.seed = body.sampler_seed
+        # We should try to use a previously saved RNG state with the same seed
+        if body.sampler_seed in vars.rng_states:
+            if vars.use_colab_tpu:
+                tpu_mtj_backend.set_rng_state(vars.rng_states[body.sampler_seed])
+            else:
+                torch.set_rng_state(vars.rng_states[body.sampler_seed])
+        else:
+            if vars.use_colab_tpu:
+                tpu_mtj_backend.set_rng_state(tpu_mtj_backend.new_rng_state(body.sampler_seed))
+            else:
+                torch.manual_seed(body.sampler_seed)
+        vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if vars.use_colab_tpu else torch.get_rng_state()
+    if hasattr(body, "sampler_order"):
+        if len(body.sampler_order) < 7:
+            body.sampler_order = [6] + body.sampler_order
     # This maps each property of the setting to use when sending the generate idempotently
     # To the object which typically contains it's value
     # This allows to set the property only for the API generation, and then revert the setting
@@ -7584,6 +7618,8 @@ def _generate_text(body: GenerationInputSchema):
         "max_context_length": ("vars", "max_length", None),
         "n": ("vars", "numseqs", None),
         "quiet": ("vars", "quiet", None),
+        "sampler_order": ("vars", "sampler_order", None),
+        "sampler_full_determinism": ("vars", "full_determinism", None),
     }
     saved_settings = {}
     set_aibusy(1)
@@ -7633,6 +7669,12 @@ def _generate_text(body: GenerationInputSchema):
         vars.output_streaming = output_streaming
         if vars.allowsp and getattr(body, "soft_prompt", None) is not None:
             spRequest(old_spfilename)
+        if hasattr(body, "sampler_seed"):
+            vars.seed = old_seed
+            if vars.use_colab_tpu:
+                tpu_mtj_backend.set_rng_state(old_rng_state)
+            else:
+                torch.set_rng_state(old_rng_state)
         set_aibusy(0)
     return output
 
@@ -10035,6 +10077,15 @@ class AddSentenceSpacingSettingsSchema(KoboldSchema):
         name = "add sentence spacing (input formatting)"
         example_yaml_value = "false"
 
+@config_endpoint_schema
+class SamplerOrderSettingSchema(KoboldSchema):
+    value = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], required=True)
+    class KoboldMeta:
+        route_name = "sampler_order"
+        obj = "vars"
+        var_name = "sampler_order"
+        name = "sampler order"
+        example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
 
 
 for schema in config_endpoint_schemas:
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index e8594721..ec2b2f45 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -71,6 +71,15 @@ def set_rng_seed(seed: int):
 def randomize_rng_seed():
     return set_rng_seed(random.randrange(sys.maxsize))
 
+def get_rng_state():
+    return rng
+
+def set_rng_state(state):
+    global rng
+    rng = state
+
+def new_rng_state(seed: int):
+    return random.Random(seed)
 
 def warper_callback(logits) -> np.array:
     raise NotImplementedError("`tpu_mtj_backend.warper_callback()` needs to be defined")

From dd1c25241d4c0816d35523f1e02448fdd94eebbd Mon Sep 17 00:00:00 2001
From: vfbd <ve.forbryderne@gmail.com>
Date: Sun, 2 Oct 2022 17:43:54 -0400
Subject: [PATCH 08/11] Allow sampler seed and full determinism to be
 read/written in /config

---
 aiserver.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 1b74167f..8d9bd27d 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -9880,6 +9880,60 @@ def put_config_soft_prompt(body: SoftPromptSettingSchema):
         settingschanged()
     return {}
 
+class SamplerSeedSettingSchema(KoboldSchema):
+    value: int = fields.Integer(required=True)
+
+@api_v1.get("/config/sampler_seed")
+@api_schema_wrap
+def get_config_sampler_seed():
+    """---
+    get:
+      summary: Retrieve the current global sampler seed value
+      tags:
+        - config
+      responses:
+        200:
+          description: Successful request
+          content:
+            application/json:
+              schema: SamplerSeedSettingSchema
+              example:
+                value: 3475097509890965500
+    """
+    return {"value": __import__("tpu_mtj_backend").get_rng_seed() if vars.use_colab_tpu else __import__("torch").initial_seed()}
+
+@api_v1.put("/config/sampler_seed")
+@api_schema_wrap
+def put_config_sampler_seed(body: SamplerSeedSettingSchema):
+    """---
+    put:
+      summary: Set the global sampler seed value
+      tags:
+        - config
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema: SamplerSeedSettingSchema
+            example:
+              value: 3475097509890965500
+      responses:
+        200:
+          description: Successful request
+          content:
+            application/json:
+              schema: EmptySchema
+        {api_validation_error_response}
+    """
+    if vars.use_colab_tpu:
+        import tpu_mtj_backend
+        tpu_mtj_backend.set_rng_seed(body.value)
+    else:
+        import torch
+        torch.manual_seed(body.value)
+    vars.seed = body.value
+    return {}
+
 config_endpoint_schemas: List[Type[KoboldSchema]] = []
 
 def config_endpoint_schema(c: Type[KoboldSchema]):
@@ -10087,6 +10141,16 @@ class SamplerOrderSettingSchema(KoboldSchema):
         name = "sampler order"
         example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
 
+@config_endpoint_schema
+class SamplerFullDeterminismSettingSchema(KoboldSchema):
+    value = fields.Boolean(required=True)
+    class KoboldMeta:
+        route_name = "sampler_full_determinism"
+        obj = "vars"
+        var_name = "full_determinism"
+        name = "sampler full determinism"
+        example_yaml_value = "false"
+
 
 for schema in config_endpoint_schemas:
     create_config_endpoint(schema=schema.__name__, method="GET")

From bdfa6d86b740b2d8a2403db6dc81f9c11b38c245 Mon Sep 17 00:00:00 2001
From: vfbd <ve.forbryderne@gmail.com>
Date: Sun, 2 Oct 2022 17:50:32 -0400
Subject: [PATCH 09/11] Seed has to be a 64-bit unsigned int or PyTorch will
 throw an error tpu_mtj_backend's seed can be an integer of arbitrary size but
 we will limit it to a 64-bit unsigned integer anyways for consistency.

---
 aiserver.py        | 4 ++--
 tpu_mtj_backend.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 8d9bd27d..c0485cd1 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -7478,7 +7478,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
     frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
     quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
     sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
-    sampler_seed: Optional[int] = fields.Integer(metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
+    sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
     sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
 
 class GenerationResultSchema(KoboldSchema):
@@ -9881,7 +9881,7 @@ def put_config_soft_prompt(body: SoftPromptSettingSchema):
     return {}
 
 class SamplerSeedSettingSchema(KoboldSchema):
-    value: int = fields.Integer(required=True)
+    value: int = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), required=True)
 
 @api_v1.get("/config/sampler_seed")
 @api_schema_wrap
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index ec2b2f45..2642943b 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -55,7 +55,7 @@ from mesh_transformer.util import to_bf16
 
 params: Dict[str, Any] = {}
 
-__seed = random.randrange(sys.maxsize)
+__seed = random.randrange(2**64)
 rng = random.Random(__seed)
 
 
@@ -69,7 +69,7 @@ def set_rng_seed(seed: int):
     return seed
 
 def randomize_rng_seed():
-    return set_rng_seed(random.randrange(sys.maxsize))
+    return set_rng_seed(random.randrange(2**64))
 
 def get_rng_state():
     return rng

From 6af0e842f2d572ded7ea6cec3a87d1b09bb34faf Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Tue, 4 Oct 2022 17:42:18 +0200
Subject: [PATCH 10/11] Switch to official

Switch to the official branch on KoboldAI now that it is compatible
---
 docker-standalone/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-standalone/Dockerfile b/docker-standalone/Dockerfile
index 6e13ce0e..df375a3e 100644
--- a/docker-standalone/Dockerfile
+++ b/docker-standalone/Dockerfile
@@ -1,6 +1,6 @@
 FROM debian
 RUN apt update && apt install wget aria2 git bzip2 -y
-RUN git clone https://github.com/henk717/koboldai /opt/koboldai
+RUN git clone https://github.com/koboldai/koboldai-client /opt/koboldai
 WORKDIR /opt/koboldai
 RUN ./install_requirements.sh cuda
 COPY docker-helper.sh /opt/koboldai/docker-helper.sh

From 323f593a9662ebef59e3fb94e2339a4c8066d14e Mon Sep 17 00:00:00 2001
From: vfbd <ve.forbryderne@gmail.com>
Date: Thu, 6 Oct 2022 20:08:08 -0400
Subject: [PATCH 11/11] Custom unpickler to avoid pickle's arbitrary code
 execution vulnerability

---
 torch_lazy_loader.py | 79 +++++++++++++++++++++++++++++++++++++++-----
 tpu_mtj_backend.py   |  6 ++--
 2 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/torch_lazy_loader.py b/torch_lazy_loader.py
index 9e411261..1298335d 100644
--- a/torch_lazy_loader.py
+++ b/torch_lazy_loader.py
@@ -50,9 +50,12 @@ import itertools
 import zipfile
 import pickle
 import torch
+import numpy as np
+import collections
+import _codecs
 import utils
 from torch.nn import Module
-from typing import Any, Callable, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
 
 
 _EXTRA_STATE_KEY_SUFFIX = '_extra_state'
@@ -111,8 +114,50 @@ class LazyTensor:
         tensor._backward_hooks = self.backward_hooks
         return tensor
 
+class RestrictedUnpickler(pickle.Unpickler):
+    def original_persistent_load(self, saved_id):
+        return super().persistent_load(saved_id)
 
-class _LazyUnpickler(pickle.Unpickler):
+    def forced_persistent_load(self, saved_id):
+        if saved_id[0] != "storage":
+            raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
+        return self.original_persistent_load(saved_id)
+
+    def find_class(self, module, name):
+        if module == "collections" and name == "OrderedDict":
+            return collections.OrderedDict
+        elif module == "torch._utils" and name == "_rebuild_tensor_v2":
+            return torch._utils._rebuild_tensor_v2
+        elif module == "torch" and name in (
+            "DoubleStorage",
+            "FloatStorage",
+            "HalfStorage",
+            "LongStorage",
+            "IntStorage",
+            "ShortStorage",
+            "CharStorage",
+            "ByteStorage",
+            "BoolStorage",
+            "BFloat16Storage",
+        ):
+            return getattr(torch, name)
+        elif module == "numpy.core.multiarray" and name == "scalar":
+            return np.core.multiarray.scalar
+        elif module == "numpy" and name == "dtype":
+            return np.dtype
+        elif module == "_codecs" and name == "encode":
+            return _codecs.encode
+        else:
+            # Forbid everything else.
+            qualified_name = name if module == "__builtin__" else f"{module}.{name}"
+            raise pickle.UnpicklingError(f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code")
+
+    def load(self, *args, **kwargs):
+        self.original_persistent_load = getattr(self, "persistent_load", pickle.Unpickler.persistent_load)
+        self.persistent_load = self.forced_persistent_load
+        return super().load(*args, **kwargs)
+
+class _LazyUnpickler(RestrictedUnpickler):
     lazy_loaded_storages: Dict[str, LazyTensor]
 
     def __init__(self, *args, **kwargs):
@@ -127,7 +172,6 @@ class _LazyUnpickler(pickle.Unpickler):
         return LazyTensor(storage_type, key, location)
 
     def load(self, *args, **kwargs):
-        self.persistent_load = self.forced_persistent_load
         retval = super().load(*args, **kwargs)
         self.lazy_loaded_storages = {}
         return retval
@@ -213,16 +257,33 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss
                     unexpected_keys.append(key)
 
 
+@contextlib.contextmanager
+def use_custom_unpickler(unpickler: Type[pickle.Unpickler] = RestrictedUnpickler):
+    try:
+        old_unpickler = pickle.Unpickler
+        pickle.Unpickler = unpickler
+
+        old_pickle_load = pickle.load
+
+        def new_pickle_load(*args, **kwargs):
+            return pickle.Unpickler(*args, **kwargs).load()
+
+        pickle.load = new_pickle_load
+
+        yield
+
+    finally:
+        pickle.Unpickler = old_unpickler
+        pickle.load = old_pickle_load
+
 @contextlib.contextmanager
 def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, dematerialized_modules=False, use_accelerate_init_empty_weights=False):
     if not enable:
-        yield False
+        with use_custom_unpickler(RestrictedUnpickler):
+            yield False
         return
 
     try:
-        old_unpickler = pickle.Unpickler
-        pickle.Unpickler = _LazyUnpickler
-
         old_rebuild_tensor = torch._utils._rebuild_tensor
         torch._utils._rebuild_tensor = _rebuild_tensor
 
@@ -261,10 +322,10 @@ def use_lazy_torch_load(enable=True, callback: Optional[Callable] = None, demate
                 old_load_from_state_dict = torch.nn.Module._load_from_state_dict
                 torch.nn.Module._load_from_state_dict = _load_from_state_dict
 
-        yield True
+        with use_custom_unpickler(_LazyUnpickler):
+            yield True
 
     finally:
-        pickle.Unpickler = old_unpickler
         torch._utils._rebuild_tensor = old_rebuild_tensor
         torch.load = old_torch_load
         if dematerialized_modules:
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 2642943b..d992ba45 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -955,6 +955,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
     import torch
     import torch.utils.dlpack
+    import torch_lazy_loader
     from tqdm.auto import tqdm
 
     move_xmap = jax.experimental.maps.xmap(
@@ -996,8 +997,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
             continue
         layer = checkpoint_layer - 2
         shards = []
-        for checkpoint_shard in range(checkpoint_shards):
-            shards.append(torch.load(path_template.format(layer=checkpoint_layer, shard=checkpoint_shard), map_location="cpu"))
+        with torch_lazy_loader.use_custom_unpickler(torch_lazy_loader.RestrictedUnpickler):
+            for checkpoint_shard in range(checkpoint_shards):
+                shards.append(torch.load(path_template.format(layer=checkpoint_layer, shard=checkpoint_shard), map_location="cpu"))
         for key in shards[0]:
             if key == "attention.rotary_emb.inv_freq":
                 continue