From a9ef4751420d24864ef9d13f0ae934d82592f741 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Wed, 3 May 2023 17:57:38 -0500
Subject: [PATCH 001/102] Lock safetensors in version jail

Let's have breaking changes when we expect them
---
 environments/huggingface.yml | 2 +-
 environments/rocm.yml        | 2 +-
 requirements.txt             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 669d5d6d..2ac51431 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -33,7 +33,7 @@ dependencies:
     - lupa==1.10
     - transformers==4.28.0
     - huggingface_hub==0.12.1
-    - safetensors
+    - safetensors==0.3.1
     - accelerate==0.18.0
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - flask-session
diff --git a/environments/rocm.yml b/environments/rocm.yml
index dc2dd40e..1329612b 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -32,7 +32,7 @@ dependencies:
     - lupa==1.10
     - transformers==4.28.0
     - huggingface_hub==0.12.1
-    - safetensors
+    - safetensors==0.3.1
     - accelerate==0.18.0
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - ansi2html
diff --git a/requirements.txt b/requirements.txt
index 6407303c..cb567b08 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,4 +36,4 @@ pytest==7.2.2
 pytest-html==3.2.0
 pytest-metadata==2.0.4
 requests-mock==1.10.0
-safetensors
\ No newline at end of file
+safetensors==0.3.1

From 35b56117e6423dfcdedeffc13e4308da4b6342bc Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Wed, 3 May 2023 18:51:01 -0500
Subject: [PATCH 002/102] Basic PEFT support

---
 aiserver.py                           |  1 +
 environments/huggingface.yml          |  1 +
 environments/rocm.yml                 |  1 +
 modeling/inference_models/hf_torch.py | 33 ++++++++++++++++++++++++++-
 models/peft/README.txt                |  2 ++
 requirements.txt                      |  1 +
 6 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 models/peft/README.txt

diff --git a/aiserver.py b/aiserver.py
index 8d481b75..6af7f2b1 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1345,6 +1345,7 @@ def general_startup(override_args=None):
     parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6")
     parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
     parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
+    parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
     
     parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles")
     parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen")
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 2ac51431..b4df45ec 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -45,3 +45,4 @@ dependencies:
     - ftfy
     - pydub
     - diffusers
+    - peft==0.3.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index 1329612b..a33a8f96 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -41,3 +41,4 @@ dependencies:
     - ftfy
     - pydub
     - diffusers
+    - peft==0.3.0
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 49cdfc0f..324cf953 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -22,6 +22,7 @@ from transformers import (
     AutoModelForCausalLM,
     LogitsProcessorList,
 )
+from peft import PeftModel, PeftConfig
 
 import utils
 import modeling.lazy_loader as lazy_loader
@@ -211,6 +212,31 @@ class HFTorchInferenceModel(HFInferenceModel):
         new_sample.old_sample = transformers.GenerationMixin.sample
         use_core_manipulations.sample = new_sample
 
+        # PEFT Loading. This MUST be done after all save_pretrained calls are
+        # finished on the main model.
+        if utils.args.peft:
+            peft_local_path = os.path.join("models/peft", utils.args.peft.replace("/", "_"))
+            logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
+
+            peft_installed_locally = True
+            possible_peft_locations = [peft_local_path, utils.args.peft]
+
+            for i, location in enumerate(possible_peft_locations):
+                try:
+                    m_self.model = PeftModel.from_pretrained(m_self.model, location)
+                    logger.debug(f"Loaded PEFT at '{location}'")
+                    break
+                except ValueError:
+                    peft_installed_locally = False
+                    if i == len(possible_peft_locations) - 1:
+                        raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
+                except RuntimeError:
+                    raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
+
+            if not peft_installed_locally:
+                logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
+                m_self.model.save_pretrained(peft_local_path)
+
         return super()._post_load()
 
     def _raw_generate(
@@ -238,8 +264,13 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         with torch.no_grad():
             start_time = time.time()
+
+            # HEED & BEWARE: All arguments passed to self.model.generate MUST be
+            # kwargs; see https://github.com/huggingface/peft/issues/232. If they
+            # aren't, PeftModel will EXPLODE!!!! But nothing will happen without
+            # a PEFT loaded so it's sneaky.
             genout = self.model.generate(
-                gen_in,
+                input_ids=gen_in,
                 do_sample=True,
                 max_length=min(
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
diff --git a/models/peft/README.txt b/models/peft/README.txt
new file mode 100644
index 00000000..fc7b72c4
--- /dev/null
+++ b/models/peft/README.txt
@@ -0,0 +1,2 @@
+PEFT models will be stored in this directory when downloaded.
+Please don't be too mean to this directory.
diff --git a/requirements.txt b/requirements.txt
index cb567b08..800877ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,3 +37,4 @@ pytest-html==3.2.0
 pytest-metadata==2.0.4
 requests-mock==1.10.0
 safetensors==0.3.1
+peft==0.3.0

From 91463a4d9790a8f09b0e573161a4e513e2db2b26 Mon Sep 17 00:00:00 2001
From: Bogdan Drema <zurnaz@gmail.com>
Date: Thu, 4 May 2023 01:47:41 +0100
Subject: [PATCH 003/102] feat: llama config and updated mtj requirement

---
 maps/llama.json      | 35 +++++++++++++++++++++++++++++++++++
 requirements_mtj.txt |  2 +-
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 maps/llama.json

diff --git a/maps/llama.json b/maps/llama.json
new file mode 100644
index 00000000..c1da6491
--- /dev/null
+++ b/maps/llama.json
@@ -0,0 +1,35 @@
+{
+    "mtj_compat": "llama",
+    "mtj_pe": "neox_rotary",
+    "mtj_config_map": {
+      "norm": ["norm", "layernorm-nobias"],
+      "pe_rotary_dims": ["pe_rotary_dims", 128],
+      "d_model": "hidden_size",
+      "n_heads": "num_attention_heads",
+      "n_vocab": "vocab_size",
+      "layers": "num_hidden_layers",
+      "seq": "max_position_embeddings",
+      "tokenizer_class": ["tokenizer_class", "LlamaTokenizer"],
+      "tokenizer": ["tokenizer", "llama"]
+    },
+    "static_weights": {
+      "model.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+      "model.norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+      "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}}
+    },
+    "layer_weights": {
+      "transformer.h.{layer}.attn.attention.bias": {},
+      "transformer.h.{layer}.attn.attention.masked_bias": {},
+      "model.layers.{layer}.self_attn.rotary_emb.inv_freq": {},
+      "model.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+      "model.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+      "model.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+      "model.layers.{layer}.self_attn.o_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+      "model.layers.{layer}.mlp.gate_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+      "model.layers.{layer}.mlp.down_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+      "model.layers.{layer}.mlp.up_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_6", "param": "w"}},
+      "model.layers.{layer}.input_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+      "model.layers.{layer}.post_attention_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}}
+    }
+  }
+      
\ No newline at end of file
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 19da3910..ef9bb2b4 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -9,7 +9,7 @@ transformers == 4.28.0
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2
-git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
+git+https://github.com/Zurnaz/mesh-transformer-jax.git@llama_tpu
 Flask==2.2.3
 Flask-SocketIO==5.3.2
 python-socketio==5.7.2

From 33745669dd8a08d3c5d743de67c0ed169a5d1dce Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 5 May 2023 13:14:58 +0200
Subject: [PATCH 004/102] Pytorch 2.0

---
 environments/huggingface.yml | 6 ++++--
 requirements.txt             | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 669d5d6d..b8d640fb 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -1,6 +1,7 @@
 name: koboldai
 channels:
   - pytorch
+  - nvidia
   - conda-forge
   - defaults
 dependencies:
@@ -9,9 +10,9 @@ dependencies:
   - flask-socketio=5.3.2
   - flask-session=0.4.0
   - python-socketio=5.7.2
-  - pytorch=1.11.*
+  - pytorch=2.0.*
   - python=3.8.*
-  - cudatoolkit=11.1
+  - pytorch-cuda=11.8
   - eventlet=0.33.3
   - dnspython=2.2.1
   - markdown
@@ -45,3 +46,4 @@ dependencies:
     - ftfy
     - pydub
     - diffusers
+    - peft
diff --git a/requirements.txt b/requirements.txt
index 6407303c..584e7377 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ Flask==2.2.3
 Flask-SocketIO==5.3.2
 python-socketio==5.7.2
 requests
-torch >= 1.9, < 1.13
+torch == 2.0.*
 flask-cloudflared==0.0.10
 flask-ngrok
 flask-cors
@@ -36,4 +36,5 @@ pytest==7.2.2
 pytest-html==3.2.0
 pytest-metadata==2.0.4
 requests-mock==1.10.0
-safetensors
\ No newline at end of file
+safetensors
+peft
\ No newline at end of file

From b1722081a505019a269bedb1a6177ab4874da765 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 5 May 2023 15:12:59 +0200
Subject: [PATCH 005/102] AMD Pytorch 2.0

---
 environments/rocm.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/environments/rocm.yml b/environments/rocm.yml
index dc2dd40e..6213089b 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -24,8 +24,8 @@ dependencies:
   - Pillow
   - psutil
   - pip:
-    - --extra-index-url https://download.pytorch.org/whl/rocm5.2
-    - torch==1.13.1+rocm5.2
+    - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
+    - torch==2.0.*
     - flask-cloudflared==0.0.10
     - flask-ngrok
     - flask-cors
@@ -41,3 +41,4 @@ dependencies:
     - ftfy
     - pydub
     - diffusers
+    - peft

From 33969b5845fec660d206ba9deba79de5500d4e6c Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 5 May 2023 17:23:01 +0200
Subject: [PATCH 006/102] Basic HF code execution support

---
 aiserver.py                           | 12 +++++++++---
 koboldai_settings.py                  |  5 +++--
 modeling/inference_models/hf_torch.py |  1 +
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 8d481b75..32823dd0 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1345,7 +1345,8 @@ def general_startup(override_args=None):
     parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6")
     parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
     parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
-    
+    parser.add_argument("--trust_remote_code", action='store_true', default=False, help="Allow Huggingface Models to Execute Code (Insecure!)")  
+     
     parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles")
     parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen")
     parser.add_argument('-q', '--quiesce', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen")
@@ -1469,8 +1470,13 @@ def general_startup(override_args=None):
             allowed_ips = sorted(allowed_ips, key=lambda ip: int(''.join([i.zfill(3) for i in ip.split('.')])))
             print(f"Allowed IPs: {allowed_ips}")
 
-
-
+    if args.trust_remote_code:
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        koboldai_vars.trust_remote_code = True
     if args.cpu:
         koboldai_vars.use_colab_tpu = False
 
diff --git a/koboldai_settings.py b/koboldai_settings.py
index dfccd4ef..d8416df2 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1207,12 +1207,12 @@ class system_settings(settings):
     local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                             'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
                             'sp', '_horde_pid', 'inference_config', 'image_pipeline', 
-                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui']
+                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
     no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                          'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
                          'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 
                          'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
-                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch']
+                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
     settings_name = "system"
     def __init__(self, socketio, koboldai_var):
         self._socketio = socketio
@@ -1298,6 +1298,7 @@ class system_settings(settings):
         self.seen_messages = []
         self.git_repository = ""
         self.git_branch = ""
+        self.trust_remote_code = False
         
         
         @dataclass
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 49cdfc0f..ca1f1cdf 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -265,6 +265,7 @@ class HFTorchInferenceModel(HFInferenceModel):
     def _get_model(self, location: str, tf_kwargs: Dict):
         tf_kwargs["revision"] = utils.koboldai_vars.revision
         tf_kwargs["cache_dir"] = "cache"
+        tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
 
         # If we have model hints for legacy model, use them rather than fall back.
         try:

From d508b4a3199df05eafa559ac021abbd6e88fb574 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 5 May 2023 19:50:56 +0200
Subject: [PATCH 007/102] More max_context_length flexibility

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 32823dd0..28a0e298 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8292,7 +8292,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
     use_userscripts: bool = fields.Boolean(load_default=False, metadata={"description": "Whether or not to use the userscripts from the KoboldAI GUI when generating text."})
     soft_prompt: Optional[str] = fields.String(metadata={"description": "Soft prompt to use when generating. If set to the empty string or any other string containing no non-whitespace characters, uses no soft prompt."}, validate=[soft_prompt_validator, validate.Regexp(r"^[^/\\]*$")])
     max_length: int = fields.Integer(validate=validate.Range(min=1, max=512), metadata={"description": "Number of tokens to generate."})
-    max_context_length: int = fields.Integer(validate=validate.Range(min=512, max=2048), metadata={"description": "Maximum number of tokens to send to the model."})
+    max_context_length: int = fields.Integer(validate=validate.Range(min=1), metadata={"description": "Maximum number of tokens to send to the model."})
     n: int = fields.Integer(validate=validate.Range(min=1, max=5), metadata={"description": "Number of outputs to generate."})
     disable_output_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all output formatting options default to `false` instead of the value in the KoboldAI GUI."})
     frmttriminc: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes some characters from the end of the output such that the output doesn't end in the middle of a sentence. If the output is less than one sentence long, does nothing.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})

From 2730879c61273a4484d66a5fcfc195d45d7af015 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 5 May 2023 21:28:06 +0200
Subject: [PATCH 008/102] Better warning until something more robust is in

---
 aiserver.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 28a0e298..0a467aa3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1472,10 +1472,10 @@ def general_startup(override_args=None):
 
     if args.trust_remote_code:
         logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
-        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
-        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
-        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
-        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
+        logger.warning("You are not protected from Model Viruses in this mode!")
+        logger.warning("Exit the program now to abort execution!")
+        logger.warning("Only use this mode with models that you trust and verified!")
+        time.sleep(25)
         koboldai_vars.trust_remote_code = True
     if args.cpu:
         koboldai_vars.use_colab_tpu = False

From b7db709c4729cc2ec522fc987a4fe6f0126f4bea Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Sat, 6 May 2023 11:16:09 -0500
Subject: [PATCH 009/102] PEFT: Change directory structure to be inside model

---
 modeling/inference_models/hf_torch.py | 10 +++++++++-
 models/peft/README.txt                |  2 --
 2 files changed, 9 insertions(+), 3 deletions(-)
 delete mode 100644 models/peft/README.txt

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index d4d5320b..c46195ba 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -215,7 +215,15 @@ class HFTorchInferenceModel(HFInferenceModel):
         # PEFT Loading. This MUST be done after all save_pretrained calls are
         # finished on the main model.
         if utils.args.peft:
-            peft_local_path = os.path.join("models/peft", utils.args.peft.replace("/", "_"))
+            local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
+
+            # Make PEFT dir if it doesn't exist
+            try:
+                os.makedirs(local_peft_dir)
+            except FileExistsError:
+                pass
+
+            peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
             logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
 
             peft_installed_locally = True
diff --git a/models/peft/README.txt b/models/peft/README.txt
deleted file mode 100644
index fc7b72c4..00000000
--- a/models/peft/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-PEFT models will be stored in this directory when downloaded.
-Please don't be too mean to this directory.

From bb206f598ee114de000bbfaa99371834c944740b Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sat, 6 May 2023 18:55:26 +0200
Subject: [PATCH 010/102] Don't load peft when unused

---
 modeling/inference_models/hf_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index c46195ba..990fabfc 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -22,7 +22,6 @@ from transformers import (
     AutoModelForCausalLM,
     LogitsProcessorList,
 )
-from peft import PeftModel, PeftConfig
 
 import utils
 import modeling.lazy_loader as lazy_loader
@@ -215,6 +214,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         # PEFT Loading. This MUST be done after all save_pretrained calls are
         # finished on the main model.
         if utils.args.peft:
+            from peft import PeftModel, PeftConfig
             local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
 
             # Make PEFT dir if it doesn't exist

From cb4af7e56e67ac877a0d396b51559d4a48b7d986 Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Mon, 8 May 2023 17:23:49 +0200
Subject: [PATCH 011/102] Update requirements_mtj.txt

---
 requirements_mtj.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index ef9bb2b4..1b40fded 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -32,4 +32,5 @@ ansi2html
 flask_compress
 ijson
 ftfy
-pydub
\ No newline at end of file
+pydub
+sentencepiece

From d53726bed610d03ec4b3edf3613c72f3754a7fba Mon Sep 17 00:00:00 2001
From: Bogdan Drema <zurnaz@gmail.com>
Date: Mon, 8 May 2023 18:24:34 +0100
Subject: [PATCH 012/102] fix: tpu tokenizers errors

---
 modeling/inference_models/hf.py     | 2 +-
 modeling/inference_models/hf_mtj.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index cd609fed..37f473ca 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -59,7 +59,7 @@ class HFInferenceModel(InferenceModel):
                         token_ids = [first]
                     elif len(token_ids) > 0:
                         first = int(token_ids[0])
-                elif token_ids:
+                elif token_ids is not None and len(token_ids) > 0:
                     first = token_ids[0]
                 result = original_decode(self, token_ids, *args, **kwargs)
                 if first is not None and first in has_prefix_space:
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 7661a67f..d7035cbf 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -17,6 +17,7 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 from modeling.inference_models.hf import HFInferenceModel
+from modeling.tokenizer import GenericTokenizer
 
 # This file shouldn't be imported unless using the TPU
 assert utils.koboldai_vars.use_colab_tpu
@@ -193,8 +194,7 @@ class HFMTJInferenceModel(HFInferenceModel):
         utils.koboldai_vars.modeldim = int(
             tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])
         )
-
-        self.tokenizer = tpu_mtj_backend.tokenizer
+        self.tokenizer = GenericTokenizer(tpu_mtj_backend.tokenizer)
 
         if (
             utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default

From a9e342ca64f8376e85d92beb9e65d246ec3997a8 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Mon, 8 May 2023 17:10:47 -0500
Subject: [PATCH 013/102] Fix TPU API errors

---
 aiserver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aiserver.py b/aiserver.py
index e744d18e..ef49f05c 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3708,6 +3708,7 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
         soft_tokens=soft_tokens,
         sampler_order=koboldai_vars.sampler_order,
     )
+    genout = np.array(genout)
     genout = [utils.applyoutputformatting(utils.decodenewlines(tokenizer.decode(txt))) for txt in genout]
 
     return genout

From 9fdc2f73a63e1f6fd64fdad06f37aef1f97b0adc Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 9 May 2023 20:59:10 +0200
Subject: [PATCH 014/102] ROCM Downgrade for stability

---
 environments/rocm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/rocm.yml b/environments/rocm.yml
index 51b3e852..81e32a58 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -25,7 +25,7 @@ dependencies:
   - psutil
   - pip:
     - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
-    - torch==2.0.*
+    - torch==1.13.*
     - flask-cloudflared==0.0.10
     - flask-ngrok
     - flask-cors

From 702f59b2dbd458ccc9426cee0226740870a62b36 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 9 May 2023 22:10:01 +0200
Subject: [PATCH 015/102] Downgrade ROCM properly

---
 environments/rocm.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/environments/rocm.yml b/environments/rocm.yml
index 81e32a58..a33a8f96 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -24,8 +24,8 @@ dependencies:
   - Pillow
   - psutil
   - pip:
-    - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
-    - torch==1.13.*
+    - --extra-index-url https://download.pytorch.org/whl/rocm5.2
+    - torch==1.13.1+rocm5.2
     - flask-cloudflared==0.0.10
     - flask-ngrok
     - flask-cors

From 71aee4dbd8f1d429e0ebd27dbf98bfd6fcf6c52c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 10 May 2023 16:30:46 -0400
Subject: [PATCH 016/102] First concept of model plugins with a conceptual UI.
 Completely breaks UI2 model loading.

---
 aiserver.py                                   | 123 ++--
 modeling/inference_model.py                   |   9 +
 modeling/inference_models/api.py              |  26 +-
 modeling/inference_models/basic_api.py        |  29 +-
 modeling/inference_models/generic_hf_torch.py |   8 +-
 modeling/inference_models/hf.py               | 190 ------
 modeling/inference_models/hf_mtj.py           |  22 +-
 modeling/inference_models/horde.py            |  88 ++-
 modeling/inference_models/openai.py           |  85 ++-
 modeling/inference_models/parents/hf.py       | 219 +++++++
 .../{ => parents}/hf_torch.py                 |  56 +-
 modeling/inference_models/rwkv.py             |  26 +-
 static/koboldai.css                           |  44 ++
 static/koboldai.js                            | 548 +++++++++---------
 templates/popups.html                         |  30 +-
 templates/templates.html                      |  19 +
 16 files changed, 912 insertions(+), 610 deletions(-)
 delete mode 100644 modeling/inference_models/hf.py
 create mode 100644 modeling/inference_models/parents/hf.py
 rename modeling/inference_models/{ => parents}/hf_torch.py (94%)

diff --git a/aiserver.py b/aiserver.py
index e744d18e..e7227c81 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -168,6 +168,7 @@ class MenuFolder(MenuItem):
             "size": "",
             "isMenu": True,
             "isDownloaded": False,
+            "isDirectory":  False
         }
 
 class MenuModel(MenuItem):
@@ -200,8 +201,28 @@ class MenuModel(MenuItem):
             "size": self.vram_requirements,
             "isMenu": False,
             "isDownloaded": self.is_downloaded,
+            "isDirectory": False,
         }
 
+class MenuPath(MenuItem):
+    def to_ui1(self) -> list:
+        return [
+            self.label,
+            self.name,
+            "",
+            True,
+        ]
+    
+    def to_json(self) -> dict:
+        return {
+            "label": self.label,
+            "name": self.name,
+            "size": "",
+            "isMenu": True,
+            "isDownloaded": False,
+            "isDirectory": True,
+            "path": "./models"
+        }
 
 # AI models Menu
 # This is a dict of lists where they key is the menu name, and the list is the menu items.
@@ -209,8 +230,8 @@ class MenuModel(MenuItem):
 # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False)
 model_menu = {
     "mainmenu": [
-        MenuModel("Load a model from its directory", "NeoCustom"),
-        MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
+        MenuPath("Load a model from its directory", "NeoCustom"),
+        MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
         MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
         MenuFolder("Adventure Models", "adventurelist"),
         MenuFolder("Novel Models", "novellist"),
@@ -600,6 +621,15 @@ utils.socketio = socketio
 # Weird import position to steal koboldai_vars from utils
 from modeling.patches import patch_transformers
 
+#Load all of the model importers
+import importlib
+model_loader_code = {}
+model_loaders = {}
+for module in os.listdir("./modeling/inference_models"):
+    if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
+        model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
+        model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader()
+        
 
 old_socketio_on = socketio.on
 def new_socketio_on(*a, **k):
@@ -906,6 +936,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"):
         )
 
 def get_folder_path_info(base):
+    if base is None:
+        return [], []
     if base == 'This PC':
         breadcrumbs = [['This PC', 'This PC']]
         paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))]
@@ -1932,25 +1964,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
         koboldai_vars.breakmodel = False
 
         if koboldai_vars.model == "Colab":
-            from modeling.inference_models.basic_api import BasicAPIInferenceModel
-            model = BasicAPIInferenceModel()
+            from modeling.inference_models.basic_api import model_loader
+            model = model_loader()
         elif koboldai_vars.model == "API":
-            from modeling.inference_models.api import APIInferenceModel
-            model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", ""))
+            from modeling.inference_models.api import model_loader
+            model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
         elif koboldai_vars.model == "CLUSTER":
-            from modeling.inference_models.horde import HordeInferenceModel
-            model = HordeInferenceModel()
+            from modeling.inference_models.horde import model_loader
+            model = model_loader()
         elif koboldai_vars.model == "OAI":
-            from modeling.inference_models.openai import OpenAIAPIInferenceModel
-            model = OpenAIAPIInferenceModel()
+            from modeling.inference_models.openai import model_loader
+            model = model_loader()
 
         model.load(initial_load=initial_load)
     # TODO: This check sucks, make a model object or somethign
     elif "rwkv" in koboldai_vars.model:
         if koboldai_vars.use_colab_tpu:
             raise RuntimeError("RWKV is not supported on the TPU.")
-        from modeling.inference_models.rwkv import RWKVInferenceModel
-        model = RWKVInferenceModel(koboldai_vars.model)
+        from modeling.inference_models.rwkv import model_loader
+        model = model_loader(koboldai_vars.model)
         model.load()
     elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
         # HF Torch
@@ -1961,8 +1993,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
             except:
                 pass
 
-        from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
-        model = GenericHFTorchInferenceModel(
+        from modeling.inference_models.generic_hf_torch import model_loader
+        model = model_loader(
             koboldai_vars.model,
             lazy_load=koboldai_vars.lazy_load,
             low_mem=args.lowmem
@@ -1975,8 +2007,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
         logger.info(f"Pipeline created: {koboldai_vars.model}")
     else:
         # TPU
-        from modeling.inference_models.hf_mtj import HFMTJInferenceModel
-        model = HFMTJInferenceModel(
+        from modeling.inference_models.hf_mtj import model_loader
+        model = model_loader(
             koboldai_vars.model
         )
         model.load(
@@ -6430,7 +6462,9 @@ def UI_2_retry(data):
 @socketio.on('load_model_button')
 @logger.catch
 def UI_2_load_model_button(data):
-    sendModelSelection()
+    emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]})
+    
+
     
 #==================================================================#
 # Event triggered when user clicks the a model
@@ -6438,6 +6472,38 @@ def UI_2_load_model_button(data):
 @socketio.on('select_model')
 @logger.catch
 def UI_2_select_model(data):
+    logger.debug("Clicked on model entry: {}".format(data))
+    if data["name"] in model_menu and data['ismenu'] == "true":
+        emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
+    else:
+        #Get load methods
+        logger.debug("Asking for model info on potential model: {}".format(data))
+        valid = False
+        if 'path' not in data or data['path'] == "":
+            valid_loaders = {}
+            for model_loader in model_loaders:
+                logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"])))
+                if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+                    valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                    valid = True
+            if valid:
+                logger.debug("Valid Loaders: {}".format(valid_loaders))
+                emit("selected_model_info", valid_loaders)
+        if not valid:
+            #Get directories
+            paths, breadcrumbs = get_folder_path_info(data['path'])
+            output = []
+            for path in paths:
+                valid=False
+                for model_loader in model_loaders:
+                    if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"):
+                        valid=True
+                        break
+                output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
+    
+    return
+    
     
     #We've selected a menu
     if data['model'] in model_menu:
@@ -6462,26 +6528,9 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    if not os.path.exists("settings/"):
-        os.mkdir("settings")
-    changed = True
-    if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"):
-        with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file:
-            file_data = file.read().split('\n')[:2]
-            if len(file_data) < 2:
-                file_data.append("0")
-            gpu_layers, disk_layers = file_data
-            if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']:
-                changed = False
-    if changed:
-        f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w")
-        f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers']))
-        f.close()
-    koboldai_vars.colaburl = data['url'] + "/request"
-    koboldai_vars.model = data['model']
-    koboldai_vars.custmodpth = data['path']
-    print("loading Model")
-    load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
+    logger.info("loading Model")
+    logger.info(data)
+    #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
 # Event triggered when load story is clicked
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index b253c5bf..27ad46db 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,15 @@ class InferenceModel:
         ]
         self.tokenizer = None
         self.capabilties = ModelCapabilities()
+    
+    def is_valid(self, model_name, model_path, menu_path, vram):
+        return True
+        
+    def requested_parameters(self, model_name, model_path, menu_path, vram):
+        return {}
+        
+    def define_input_parameters(self):
+        return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
         """User-facing load function. Do not override this; try `_load()` instead."""
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index d25505b0..41088bc7 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -22,9 +22,31 @@ class APIException(Exception):
     """To be used for errors when using the Kobold API as an interface."""
 
 
-class APIInferenceModel(InferenceModel):
-    def __init__(self, base_url: str) -> None:
+class model_loader(InferenceModel):
+    def __init__(self) -> None:
         super().__init__()
+        #self.base_url = ""
+
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "API"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "base_url",
+                                        "default": False,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, base_url=""):
         self.base_url = base_url.rstrip("/")
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index c96eb42c..d7fc0863 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -19,12 +19,37 @@ class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class BasicAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "Colab"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "colaburl",
+                                        "default": False,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "The URL of the Colab KoboldAI API to connect to.",
+                                        "menu_path": "",
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+        
+    def set_input_parameters(self, colaburl=""):
+        self.colaburl = colaburl
+
+    def _initialize_model(self):
+        return
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
@@ -68,7 +93,7 @@ class BasicAPIInferenceModel(InferenceModel):
         }
 
         # Create request
-        req = requests.post(utils.koboldai_vars.colaburl, json=reqdata)
+        req = requests.post(self.colaburl, json=reqdata)
 
         if req.status_code != 200:
             raise BasicAPIException(f"Bad status code {req.status_code}")
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index aa602b1a..366fbbb7 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -20,10 +20,14 @@ except ModuleNotFoundError as e:
     if not utils.koboldai_vars.use_colab_tpu:
         raise e
 
-from modeling.inference_models.hf_torch import HFTorchInferenceModel
+from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
 
 
-class GenericHFTorchInferenceModel(HFTorchInferenceModel):
+class model_loader(HFTorchInferenceModel):
+    
+    def _initialize_model(self):
+        return
+    
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
deleted file mode 100644
index cd609fed..00000000
--- a/modeling/inference_models/hf.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import os
-from typing import Optional
-from transformers import AutoConfig
-
-import utils
-import koboldai_settings
-from logger import logger
-from modeling.inference_model import InferenceModel
-
-
-class HFInferenceModel(InferenceModel):
-    def __init__(self, model_name: str) -> None:
-        super().__init__()
-        self.model_config = None
-        self.model_name = model_name
-
-        self.model = None
-        self.tokenizer = None
-
-    def _post_load(self) -> None:
-        # These are model specific tokenizer overrides if a model has bad defaults
-        if utils.koboldai_vars.model_type == "llama":
-            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
-            self.tokenizer.add_bos_token = False
-
-            # HF transformers no longer supports decode_with_prefix_space
-            # We work around this by wrapping decode, encode, and __call__
-            # with versions that work around the 'prefix space' misfeature
-            # of sentencepiece.
-            vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size))
-            has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")}
-
-            # Wrap 'decode' with a method that always returns text starting with a space
-            # when the head token starts with a space. This is what 'decode_with_prefix_space'
-            # used to do, and we implement it using the same technique (building a cache of
-            # tokens that should have a prefix space, and then prepending a space if the first
-            # token is in this set.) We also work around a bizarre behavior in which decoding
-            # a single token 13 behaves differently than decoding a squence containing only [13].
-            original_decode = type(self.tokenizer.tokenizer).decode
-            def decode_wrapper(self, token_ids, *args, **kwargs):
-                first = None
-                # Note, the code below that wraps single-value token_ids in a list
-                # is to work around this wonky behavior:
-                #   >>> t.decode(13)
-                #   '<0x0A>'
-                #   >>> t.decode([13])
-                #   '\n'
-                # Not doing this causes token streaming to receive <0x0A> characters
-                # instead of newlines.
-                if isinstance(token_ids, int):
-                    first = token_ids
-                    token_ids = [first]
-                elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor
-                    # Tensors don't support the Python standard of 'empty is False'
-                    # and the special case of dimension 0 tensors also needs to be
-                    # handled separately.
-                    if token_ids.dim() == 0:
-                        first = int(token_ids.item())
-                        token_ids = [first]
-                    elif len(token_ids) > 0:
-                        first = int(token_ids[0])
-                elif token_ids:
-                    first = token_ids[0]
-                result = original_decode(self, token_ids, *args, **kwargs)
-                if first is not None and first in has_prefix_space:
-                    result = " " + result
-                return result
-            # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it
-            object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer))
-
-            # Wrap encode and __call__ to work around the 'prefix space' misfeature also.
-            # The problem is that "Bob" at the start of text is encoded as if it is
-            # " Bob". This creates a problem because it means you can't split text, encode
-            # the pieces, concatenate the tokens, decode them, and get the original text back.
-            # The workaround is to prepend a known token that (1) starts with a space; and
-            # (2) is not the prefix of any other token. After searching through the vocab
-            # " ," (space comma) is the only token containing only printable ascii characters
-            # that fits this bill. By prepending ',' to the text, the original encode
-            # method always returns [1919, ...], where the tail of the sequence is the
-            # actual encoded result we want without the prefix space behavior.
-            original_encode = type(self.tokenizer.tokenizer).encode
-            def encode_wrapper(self, text, *args, **kwargs):
-                if type(text) is str:
-                    text = ',' + text
-                    result = original_encode(self, text, *args, **kwargs)
-                    result = result[1:]
-                else:
-                    result = original_encode(self, text, *args, **kwargs)
-                return result
-            object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer))
-
-            # Since 'encode' is documented as being deprecated, also override __call__.
-            # This doesn't appear to currently be used by KoboldAI, but doing so
-            # in case someone uses it in the future.
-            original_call = type(self.tokenizer.tokenizer).__call__
-            def call_wrapper(self, text, *args, **kwargs):
-                if type(text) is str:
-                    text = ',' + text
-                    result = original_call(self, text, *args, **kwargs)
-                    result = result[1:]
-                else:
-                    result = original_call(self, text, *args, **kwargs)
-                return result
-            object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
-
-        elif utils.koboldai_vars.model_type == "opt":
-            self.tokenizer._koboldai_header = self.tokenizer.encode("")
-            self.tokenizer.add_bos_token = False
-            self.tokenizer.add_prefix_space = False
-
-        # Change newline behavior to match model quirks
-        if utils.koboldai_vars.model_type == "xglm":
-            # Default to </s> newline mode if using XGLM
-            utils.koboldai_vars.newlinemode = "s"
-        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
-            # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
-            utils.koboldai_vars.newlinemode = "ns"
-
-        # Clean up tokens that cause issues
-        if (
-            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
-        ):
-            utils.koboldai_vars.badwordsids = [
-                [v]
-                for k, v in self.tokenizer.get_vocab().items()
-                if any(c in str(k) for c in "[]")
-            ]
-
-            if utils.koboldai_vars.newlinemode == "n":
-                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
-
-        return super()._post_load()
-
-    def get_local_model_path(
-        self, legacy: bool = False, ignore_existance: bool = False
-    ) -> Optional[str]:
-        """
-        Returns a string of the model's path locally, or None if it is not downloaded.
-        If ignore_existance is true, it will always return a path.
-        """
-
-        if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
-            model_path = utils.koboldai_vars.custmodpth
-            assert model_path
-
-            # Path can be absolute or relative to models directory
-            if os.path.exists(model_path):
-                return model_path
-
-            model_path = os.path.join("models", model_path)
-
-            try:
-                assert os.path.exists(model_path)
-            except AssertionError:
-                logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
-                raise
-
-            return model_path
-
-        basename = utils.koboldai_vars.model.replace("/", "_")
-        if legacy:
-            ret = basename
-        else:
-            ret = os.path.join("models", basename)
-
-        if os.path.isdir(ret) or ignore_existance:
-            return ret
-        return None
-
-    def init_model_config(self) -> None:
-        # Get the model_type from the config or assume a model type if it isn't present
-        try:
-            self.model_config = AutoConfig.from_pretrained(
-                self.get_local_model_path() or self.model_name,
-                revision=utils.koboldai_vars.revision,
-                cache_dir="cache",
-            )
-            utils.koboldai_vars.model_type = self.model_config.model_type
-        except ValueError:
-            utils.koboldai_vars.model_type = {
-                "NeoCustom": "gpt_neo",
-                "GPT2Custom": "gpt2",
-            }.get(utils.koboldai_vars.model)
-
-            if not utils.koboldai_vars.model_type:
-                logger.warning(
-                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
-                )
-                utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 7661a67f..c99e9a05 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -16,19 +16,17 @@ from modeling.inference_model import (
     GenerationSettings,
     ModelCapabilities,
 )
-from modeling.inference_models.hf import HFInferenceModel
-
-# This file shouldn't be imported unless using the TPU
-assert utils.koboldai_vars.use_colab_tpu
-import tpu_mtj_backend
+from modeling.inference_models.parents.hf import HFInferenceModel
 
 
-class HFMTJInferenceModel(HFInferenceModel):
+
+
+class model_loader(HFInferenceModel):
     def __init__(
         self,
-        model_name: str,
+        #model_name: str,
     ) -> None:
-        super().__init__(model_name)
+        super().__init__()
 
         self.model_config = None
         self.capabilties = ModelCapabilities(
@@ -38,8 +36,13 @@ class HFMTJInferenceModel(HFInferenceModel):
             post_token_probs=False,
             uses_tpu=True,
         )
+        
+    def is_valid(self, model_name, model_path, menu_path):
+        # This file shouldn't be imported unless using the TPU
+        return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path)
 
     def setup_mtj(self) -> None:
+        import tpu_mtj_backend
         def mtj_warper_callback(scores) -> "np.array":
             scores_shape = scores.shape
             scores_list = scores.tolist()
@@ -175,6 +178,7 @@ class HFMTJInferenceModel(HFInferenceModel):
         tpu_mtj_backend.settings_callback = mtj_settings_callback
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
+        import tpu_mtj_backend
         self.setup_mtj()
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
@@ -207,6 +211,7 @@ class HFMTJInferenceModel(HFInferenceModel):
             ]
 
     def get_soft_tokens(self) -> np.array:
+        import tpu_mtj_backend
         soft_tokens = None
 
         if utils.koboldai_vars.sp is None:
@@ -258,6 +263,7 @@ class HFMTJInferenceModel(HFInferenceModel):
         seed: Optional[int] = None,
         **kwargs,
     ) -> GenerationResult:
+        import tpu_mtj_backend
         warpers.update_settings()
 
         soft_tokens = self.get_soft_tokens()
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index c6294374..56e88205 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -21,13 +21,99 @@ class HordeException(Exception):
     """To be used for errors on server side of the Horde."""
 
 
-class HordeInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
+        self.url = "https://horde.koboldai.net"
+        self.key = "0000000000"
+        self.models = self.get_cluster_models()
+        
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
 
+    def is_valid(self, model_name, model_path, menu_path):
+        logger.debug("Horde Models: {}".format(self.models))
+        return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "URL",
+                                        "id": "url",
+                                        "default": self.url,
+                                        "tooltip": "URL to the horde.",
+                                        "menu_path": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": self.key,
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.models,
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, url="", key="", model=""):
+        self.key = key.strip()
+        self.model = model
+        self.url = url
+        
+    def get_cluster_models(self):
+        # Get list of models from public cluster
+        logger.info("<purple>Retrieving engine list...</purple>")
+        try:
+            req = requests.get(f"{self.url}/api/v2/status/models?type=text")
+        except:
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error("Provided KoboldAI Horde URL unreachable")
+            emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
+            return
+        if not req.ok:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("KAI Horde Models", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
+            return
+
+        engines = req.json()
+        try:
+            engines = [{"text": en["name"], "value": en["name"]} for en in engines]
+        except:
+            logger.error(engines)
+            raise
+        logger.debug(engines)
+        
+        online_model = ""
+
+        logger.init_ok("KAI Horde Models", status="OK")
+
+        return engines
+
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer(
             utils.koboldai_vars.cluster_requested_models[0]
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index 1441ae2f..01c0c037 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -12,13 +12,96 @@ from modeling.inference_model import (
 )
 
 
+
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class OpenAIAPIInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.key = ""
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI" or model_name == "GooseAI"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.get_oai_models(),
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, key="", model=""):
+        self.key = key.strip()
+        self.model = model
+
+    def get_oai_models(self):
+        if self.key == "":
+            return []
+        if self.source == 'OAI':
+            url = "https://api.openai.com/v1/engines"
+        elif self.source == 'GooseAI':
+            url = "https://api.goose.ai/v1/engines"
+        else:
+            return
+            
+        # Get list of models from OAI
+        logger.init("OAI Engines", status="Retrieving")
+        req = requests.get(
+            url, 
+            headers = {
+                'Authorization': 'Bearer '+self.key
+                }
+            )
+        if(req.status_code == 200):
+            r = req.json()
+            engines = r["data"]
+            try:
+                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+            except:
+                logger.error(engines)
+                raise
+            
+            online_model = ""
+
+                
+            logger.init_ok("OAI Engines", status="OK")
+            return engines
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("OAI Engines", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+            return []
+            
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("gpt2")
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
new file mode 100644
index 00000000..54781296
--- /dev/null
+++ b/modeling/inference_models/parents/hf.py
@@ -0,0 +1,219 @@
+import os
+from typing import Optional
+from transformers import AutoConfig
+
+import utils
+import koboldai_settings
+from logger import logger
+from modeling.inference_model import InferenceModel
+import torch
+
+
+class HFInferenceModel(InferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+        self.model_config = None
+        #self.model_name = model_name
+
+        self.model = None
+        self.tokenizer = None
+
+    def is_valid(self, model_name, model_path, menu_path):
+        try:
+            if model_path is not None and os.path.exists(model_path):
+                model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            else:
+                model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            return True
+        except:
+            return False
+        
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        
+        if model_path is not None and os.path.exists(model_path):
+            model_config = AutoConfig.from_pretrained(model_path)
+        elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+            model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+        else:
+            model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+        layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
+        if layer_count is not None and layer_count >= 0:
+            if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
+                with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
+                    data = [x for x in file.read().split("\n")[:2] if x != '']
+                    if len(data) < 2:
+                        data.append("0")
+                    break_values, disk_blocks = data
+                    break_values = break_values.split(",")
+            else:
+                break_values = [layer_count]
+                disk_blocks = None
+            break_values = [int(x) for x in break_values if x != '' and x is not None]
+            gpu_count = torch.cuda.device_count()
+            break_values += [0] * (gpu_count - len(break_values))
+            if disk_blocks is not None:
+                break_values += [disk_blocks]
+            for i in range(gpu_count):
+                requested_parameters.append({
+                                                "uitype": "slider",
+                                                "unit": "int",
+                                                "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+                                                "id": "{} Layers".format(i),
+                                                "min": 0,
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                "default": break_values[i],
+                                                "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+            requested_parameters.append({
+                                            "uitype": "slider",
+                                            "unit": "int",
+                                            "label": "CPU Layers",
+                                            "id": "CPU Layers",
+                                            "min": 0,
+                                            "max": layer_count,
+                                            "step": 1,
+                                            "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                            "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                            "default": layer_count - sum(break_values),
+                                            "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
+                                            "menu_path": "Layers",
+                                            "extra_classes": "",
+                                            "refresh_model_inputs": False
+                                        })
+            if disk_blocks is not None:
+                requested_parameters.append({
+                                                "uitype": "slider",
+                                                "unit": "int",
+                                                "label": "Disk Layers",
+                                                "id": "Disk_Layers",
+                                                "min": 0,
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                "default": disk_blocks,
+                                                "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
+        else:
+            requested_parameters.append({
+                                            "uitype": "toggle",
+                                            "unit": "bool",
+                                            "label": "Use GPU",
+                                            "id": "use_gpu",
+                                            "default": False,
+                                            "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                            "menu_path": "Layers",
+                                            "extra_classes": "",
+                                            "refresh_model_inputs": False
+                                        })
+                                        
+        
+        return requested_parameters
+        
+    def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
+        self.layers = layers
+        self.disk_layers = disk_layers
+        self.use_gpu = use_gpu
+
+    def _post_load(self) -> None:
+        # These are model specific tokenizer overrides if a model has bad defaults
+        if utils.koboldai_vars.model_type == "llama":
+            self.tokenizer.decode_with_prefix_space = True
+            self.tokenizer.add_bos_token = False
+        elif utils.koboldai_vars.model_type == "opt":
+            self.tokenizer._koboldai_header = self.tokenizer.encode("")
+            self.tokenizer.add_bos_token = False
+            self.tokenizer.add_prefix_space = False
+
+        # Change newline behavior to match model quirks
+        if utils.koboldai_vars.model_type == "xglm":
+            # Default to </s> newline mode if using XGLM
+            utils.koboldai_vars.newlinemode = "s"
+        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+            # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
+            utils.koboldai_vars.newlinemode = "ns"
+
+        # Clean up tokens that cause issues
+        if (
+            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
+            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+        ):
+            utils.koboldai_vars.badwordsids = [
+                [v]
+                for k, v in self.tokenizer.get_vocab().items()
+                if any(c in str(k) for c in "[]")
+            ]
+
+            if utils.koboldai_vars.newlinemode == "n":
+                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+
+        return super()._post_load()
+
+    def get_local_model_path(
+        self, legacy: bool = False, ignore_existance: bool = False
+    ) -> Optional[str]:
+        """
+        Returns a string of the model's path locally, or None if it is not downloaded.
+        If ignore_existance is true, it will always return a path.
+        """
+
+        if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
+            model_path = utils.koboldai_vars.custmodpth
+            assert model_path
+
+            # Path can be absolute or relative to models directory
+            if os.path.exists(model_path):
+                return model_path
+
+            model_path = os.path.join("models", model_path)
+
+            try:
+                assert os.path.exists(model_path)
+            except AssertionError:
+                logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.")
+                raise
+
+            return model_path
+
+        basename = utils.koboldai_vars.model.replace("/", "_")
+        if legacy:
+            ret = basename
+        else:
+            ret = os.path.join("models", basename)
+
+        if os.path.isdir(ret) or ignore_existance:
+            return ret
+        return None
+
+    def init_model_config(self) -> None:
+        # Get the model_type from the config or assume a model type if it isn't present
+        try:
+            self.model_config = AutoConfig.from_pretrained(
+                self.get_local_model_path() or self.model_name,
+                revision=utils.koboldai_vars.revision,
+                cache_dir="cache",
+            )
+            utils.koboldai_vars.model_type = self.model_config.model_type
+        except ValueError:
+            utils.koboldai_vars.model_type = {
+                "NeoCustom": "gpt_neo",
+                "GPT2Custom": "gpt2",
+            }.get(utils.koboldai_vars.model)
+
+            if not utils.koboldai_vars.model_type:
+                logger.warning(
+                    "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
+                )
+                utils.koboldai_vars.model_type = "gpt_neo"
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
similarity index 94%
rename from modeling/inference_models/hf_torch.py
rename to modeling/inference_models/parents/hf_torch.py
index 990fabfc..d8afafb1 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -31,7 +31,7 @@ from modeling import warpers
 from modeling.warpers import Warper
 from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_models.hf import HFInferenceModel
+from modeling.inference_models.parents.hf import HFInferenceModel
 from modeling.inference_model import (
     GenerationResult,
     GenerationSettings,
@@ -55,13 +55,13 @@ LOG_SAMPLER_NO_EFFECT = False
 class HFTorchInferenceModel(HFInferenceModel):
     def __init__(
         self,
-        model_name: str,
-        lazy_load: bool,
-        low_mem: bool,
+        #model_name: str,
+        #lazy_load: bool,
+        #low_mem: bool,
     ) -> None:
-        super().__init__(model_name)
-        self.lazy_load = lazy_load
-        self.low_mem = low_mem
+        super().__init__()
+        #self.lazy_load = lazy_load
+        #self.low_mem = low_mem
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -211,40 +211,6 @@ class HFTorchInferenceModel(HFInferenceModel):
         new_sample.old_sample = transformers.GenerationMixin.sample
         use_core_manipulations.sample = new_sample
 
-        # PEFT Loading. This MUST be done after all save_pretrained calls are
-        # finished on the main model.
-        if utils.args.peft:
-            from peft import PeftModel, PeftConfig
-            local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft")
-
-            # Make PEFT dir if it doesn't exist
-            try:
-                os.makedirs(local_peft_dir)
-            except FileExistsError:
-                pass
-
-            peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_"))
-            logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.")
-
-            peft_installed_locally = True
-            possible_peft_locations = [peft_local_path, utils.args.peft]
-
-            for i, location in enumerate(possible_peft_locations):
-                try:
-                    m_self.model = PeftModel.from_pretrained(m_self.model, location)
-                    logger.debug(f"Loaded PEFT at '{location}'")
-                    break
-                except ValueError:
-                    peft_installed_locally = False
-                    if i == len(possible_peft_locations) - 1:
-                        raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?")
-                except RuntimeError:
-                    raise RuntimeError("Error while loading PeftModel. Are you using the correct model?")
-
-            if not peft_installed_locally:
-                logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'")
-                m_self.model.save_pretrained(peft_local_path)
-
         return super()._post_load()
 
     def _raw_generate(
@@ -272,13 +238,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         with torch.no_grad():
             start_time = time.time()
-
-            # HEED & BEWARE: All arguments passed to self.model.generate MUST be
-            # kwargs; see https://github.com/huggingface/peft/issues/232. If they
-            # aren't, PeftModel will EXPLODE!!!! But nothing will happen without
-            # a PEFT loaded so it's sneaky.
             genout = self.model.generate(
-                input_ids=gen_in,
+                gen_in,
                 do_sample=True,
                 max_length=min(
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
@@ -304,7 +265,6 @@ class HFTorchInferenceModel(HFInferenceModel):
     def _get_model(self, location: str, tf_kwargs: Dict):
         tf_kwargs["revision"] = utils.koboldai_vars.revision
         tf_kwargs["cache_dir"] = "cache"
-        tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
 
         # If we have model hints for legacy model, use them rather than fall back.
         try:
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
index 006bb8fd..d14d8c81 100644
--- a/modeling/inference_models/rwkv.py
+++ b/modeling/inference_models/rwkv.py
@@ -17,7 +17,7 @@ from torch.nn import functional as F
 os.environ["RWKV_JIT_ON"] = "1"
 # TODO: Include compiled kernel
 os.environ["RWKV_CUDA_ON"] = "1"
-from rwkv.model import RWKV
+
 
 import utils
 from logger import logger
@@ -55,13 +55,13 @@ MODEL_FILES = {
 }
 
 
-class RWKVInferenceModel(InferenceModel):
+class model_loader(InferenceModel):
     def __init__(
         self,
-        model_name: str,
+        #model_name: str,
     ) -> None:
         super().__init__()
-        self.model_name = model_name
+        #self.model_name = model_name
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -83,6 +83,23 @@ class RWKVInferenceModel(InferenceModel):
         )
         self._old_stopping_criteria = None
 
+    def is_valid(self, model_name, model_path, menu_path):
+        try:
+            from rwkv.model import RWKV
+            valid = True
+        except:
+            valid = False
+        return valid and "rwkv" in model_name.lower()
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        return requested_parameters
+        
+    def set_input_parameters(self):
+        return
+
+
     def _ensure_directory_structure(self) -> None:
         for path in ["models/rwkv", "models/rwkv/models"]:
             try:
@@ -145,6 +162,7 @@ class RWKVInferenceModel(InferenceModel):
         # Now we load!
 
         # TODO: Breakmodel to strat
+        from rwkv.model import RWKV
         self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
 
     def _apply_warpers(
diff --git a/static/koboldai.css b/static/koboldai.css
index 230f1cbf..f3dde4b7 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding);
 }
 
 
+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px 23px 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
 .setting_minlabel {
 	padding-top: 6px;
 	grid-area: minlabel;
@@ -3370,6 +3392,23 @@ textarea {
   }
 }
 
+@keyframes pulse-red {
+  0% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
+  }
+  
+  70% {
+    transform: scale(1);
+    box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
+  }
+  
+  100% {
+    transform: scale(0.95);
+    box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
+  }
+}
+
 @keyframes pulse-text {
   0% {
     filter: blur(3px);
@@ -3391,6 +3430,11 @@ textarea {
   }
 }
 
+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
+}
+
 .single_pulse {
 	animation: pulse-text 0.5s 1;
 }
diff --git a/static/koboldai.js b/static/koboldai.js
index cfc32d21..0656253f 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -15,6 +15,7 @@ socket.on('popup_items', function(data){popup_items(data);});
 socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 socket.on('popup_edit_file', function(data){popup_edit_file(data);});
 socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
 socket.on('selected_model_info', function(data){selected_model_info(data);});
 socket.on('oai_engines', function(data){oai_engines(data);});
 socket.on('buildload', function(data){buildload(data);});
@@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true";
 let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
+var selected_model_data;
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -1500,49 +1502,46 @@ function getModelParameterCount(modelName) {
 	return base * multiplier;
 }
 
-function show_model_menu(data) {
-	//clear old options
-	document.getElementById("modelkey").classList.add("hidden");
-	document.getElementById("modelkey").value = "";
-	document.getElementById("modelurl").classList.add("hidden");
-	document.getElementById("use_gpu_div").classList.add("hidden");
-	document.getElementById("use_8_bit_div").classList.add("hidden");
-	document.getElementById("modellayers").classList.add("hidden");
-	document.getElementById("oaimodel").classList.add("hidden");
-	var model_layer_bars = document.getElementById('model_layer_bars');
-	while (model_layer_bars.firstChild) {
-		model_layer_bars.removeChild(model_layer_bars.firstChild);
+function new_show_model_menu(data) {
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
+	document.getElementById("modelplugin").classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
 	
 	//clear out the breadcrumbs
 	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
 	while (breadcrumbs.firstChild) {
 		breadcrumbs.removeChild(breadcrumbs.firstChild);
 	}
-	//add breadcrumbs
-	//console.log(data.breadcrumbs);
-	for (item of data.breadcrumbs) {
-		var button = document.createElement("button");
-		button.classList.add("breadcrumbitem");
-		button.setAttribute("model", data.menu);
-		button.setAttribute("folder", item[0]);
-		button.textContent = item[1];
-		button.onclick = function () {
-					socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")});
-				};
-		breadcrumbs.append(button);
-		var span = document.createElement("span");
-		span.textContent = "\\";
-		breadcrumbs.append(span);
-	}
 	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
 	//clear out the items
 	var model_list = document.getElementById('loadmodellistcontent')
 	while (model_list.firstChild) {
 		model_list.removeChild(model_list.firstChild);
 	}
 	//add items
-	for (item of data.data) {
+	for (item of data.items) {
 		var list_item = document.createElement("span");
 		list_item.classList.add("model_item");
 		
@@ -1564,10 +1563,27 @@ function show_model_menu(data) {
 		//create the actual item
 		var popup_item = document.createElement("span");
 		popup_item.classList.add("model");
-		popup_item.setAttribute("display_name", item.label);
-		popup_item.id = item.name;
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+		}
 		
-		popup_item.setAttribute("Menu", data.menu)
 		//name text
 		var text = document.createElement("span");
 		text.style="grid-area: item;";
@@ -1615,241 +1631,223 @@ function show_model_menu(data) {
 			});
 		})();
 		
-		popup_item.onclick = function () {
-						var accept = document.getElementById("btn_loadmodelaccept");
-						accept.classList.add("disabled");
-						socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")});
-						var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected");
-						for (model of model_list) {
-							model.classList.remove("selected");
-						}
-						this.classList.add("selected");
-						accept.setAttribute("selected_model", this.id);
-						accept.setAttribute("menu", this.getAttribute("Menu"));
-						accept.setAttribute("display_name", this.getAttribute("display_name"));
-					};
 		list_item.append(popup_item);
-		
-		
 		model_list.append(list_item);
 	}
-	var accept = document.getElementById("btn_loadmodelaccept");
-	accept.disabled = true;
 	
-	//finally, if they selected the custom hugging face menu we show the input box
-	if (data['menu'] == "customhuggingface") {
-		document.getElementById("custommodelname").classList.remove("hidden");
-	} else {
-		document.getElementById("custommodelname").classList.add("hidden");
-	}
-
-
-	// detect if we are in a model selection screen and show the reference
-	var refelement = document.getElementById("modelspecifier");
-	var check = document.getElementById("mainmenu");
-	if (check) {
-		refelement.classList.remove("hidden");
-	} else {
-		refelement.classList.add("hidden");
-	}
 	
 	openPopup("load-model");
+	
 }
 
+
 function selected_model_info(data) {
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
 	var accept = document.getElementById("btn_loadmodelaccept");
-	//hide or unhide key
-	if (data.key) {
-		document.getElementById("modelkey").classList.remove("hidden");
-		document.getElementById("modelkey").value = data.key_value;
-	} else {
-		document.getElementById("modelkey").classList.add("hidden");
-		document.getElementById("modelkey").value = "";
-	}
-	//hide or unhide URL
-	if  (data.url) {
-		document.getElementById("modelurl").classList.remove("hidden");
-	} else {
-		document.getElementById("modelurl").classList.add("hidden");
-	}
-	
-	//hide or unhide 8 bit mode
-	if (data.bit_8_available) {
-		document.getElementById("use_8_bit_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_8_bit_div").classList.add("hidden");
-		document.getElementById("use_8_bit").checked = false;
-	}
-	
-	//default URL loading
-	if (data.default_url != null) {
-		document.getElementById("modelurl").value = data.default_url;
-	}
-	
-	//change model loading on url if needed
-	if (data.models_on_url) {
-		document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});};
-		document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});};
-	} else {
-		document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});};
-		document.getElementById("modelurl").ochange = null;
-	}
-	
-	//show model select for APIs
-	if (data.show_online_model_select) {
-		document.getElementById("oaimodel").classList.remove("hidden");
-	} else {
-		document.getElementById("oaimodel").classList.add("hidden");
-	}
-	
-	//Multiple Model Select?
-	if (data.multi_online_models) {
-		document.getElementById("oaimodel").setAttribute("multiple", "");
-		document.getElementById("oaimodel").options[0].textContent = "All"
-	} else {
-		document.getElementById("oaimodel").removeAttribute("multiple");
-		document.getElementById("oaimodel").options[0].textContent = "Select Model(s)"
-	}
-	
-	//hide or unhide the use gpu checkbox
-	if  (data.gpu) {
-		document.getElementById("use_gpu_div").classList.remove("hidden");
-	} else {
-		document.getElementById("use_gpu_div").classList.add("hidden");
-	}
-	//setup breakmodel
-	if (data.breakmodel) {
-		document.getElementById("modellayers").classList.remove("hidden");
-		//setup model layer count
-		document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0);
-		document.getElementById("gpu_layers_max").textContent = data.layer_count;
-		document.getElementById("gpu_count").value = data.gpu_count;
-		
-		//create the gpu load bars
-		var model_layer_bars = document.getElementById('model_layer_bars');
-		while (model_layer_bars.firstChild) {
-			model_layer_bars.removeChild(model_layer_bars.firstChild);
-		}
-		
-		//Add the bars
-		for (let i = 0; i < data.gpu_names.length; i++) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "gpu_layers_box_"+i;
-			input.value = data.break_values[i];
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.break_values[i];
-			input.id = "gpu_layers_" + i;
-			input.onchange = function () {
-								document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-			
-			model_layer_bars.append(div);
-		}
-		
-		//add the disk layers
-		if (data.disk_break) {
-			var div = document.createElement("div");
-			div.classList.add("model_setting_container");
-			//build GPU text
-			var span = document.createElement("span");
-			span.classList.add("model_setting_label");
-			span.textContent = "Disk cache: "
-			//build layer count box
-			var input = document.createElement("input");
-			input.classList.add("model_setting_value");
-			input.classList.add("setting_value");
-			input.inputmode = "numeric";
-			input.id = "disk_layers_box";
-			input.value = data.disk_break_value;
-			input.onblur = function () {
-								document.getElementById(this.id.replace("_box", "")).value = this.value;
-								update_gpu_layers();
-							}
-			span.append(input);
-			div.append(span);
-			//build layer count slider
-			var input = document.createElement("input");
-			input.classList.add("model_setting_item");
-			input.type = "range";
-			input.min = 0;
-			input.max = data.layer_count;
-			input.step = 1;
-			input.value = data.disk_break_value;
-			input.id = "disk_layers";
-			input.onchange = function () {
-								document.getElementById(this.id+"_box").value = this.value;
-								update_gpu_layers();
-							}
-			div.append(input);
-			//build slider bar #s
-			//min
-			var span = document.createElement("span");
-			span.classList.add("model_setting_minlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = 0;
-			span.append(span2);
-			div.append(span);
-			//max
-			var span = document.createElement("span");
-			span.classList.add("model_setting_maxlabel");
-			var span2 = document.createElement("span");
-			span2.style="top: -4px; position: relative;";
-			span2.textContent = data.layer_count;
-			span.append(span2);
-			div.append(span);
-		}
-		
-		model_layer_bars.append(div);
-		
-		update_gpu_layers();
-	} else {
-		document.getElementById("modellayers").classList.add("hidden");
-		accept.classList.remove("disabled");
-	}
 	accept.disabled = false;
 	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = function () {
+				//get check value:
+				if ('sum' in this.check_data) {
+					check_value = 0
+					for (const temp of this.check_data['sum']) {
+						if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+							check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+						}
+					}
+				} else {
+					check_value = this.value
+				}
+				if (this.check_data['check'] == "=") {
+					valid = (check_value == this.check_data['value']);
+				} else if (this.check_data['check'] == "!=") {
+					valid = (check_value != this.check_data['value']);
+				} else if (this.check_data['check'] == ">=") {
+					valid = (check_value >= this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value <= this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value > this.check_data['value']);
+				} else if (this.check_data['check'] == "<=") {	
+					valid = (check_value < this.check_data['value']);
+				}
+				if (valid) {
+					//if we are supposed to refresh when this value changes we'll resubmit
+					if (this.getAttribute("refresh_model_inputs") == "true") {
+						console.log("resubmit");
+					}
+					if ('sum' in this.check_data) {
+						for (const temp of this.check_data['sum']) {
+							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+							}
+						}
+					} else {
+						this.closest(".setting_container_model").classList.remove('input_error');
+						this.closest(".setting_container_model").removeAttribute("tooltip");
+					}
+					var accept = document.getElementById("btn_loadmodelaccept");
+					if (document.getElementsByClassName("input_error").length)
+					accept.disabled = true;
+				} else {
+					if ('sum' in this.check_data) {
+						for (const temp of this.check_data['sum']) {
+							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+							}
+						}
+					} else {
+						this.closest(".setting_container_model").classList.add('input_error');
+						this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+					}
+				}
+				var accept = document.getElementById("btn_loadmodelaccept");
+				if (document.getElementsByClassName("input_error").length > 0) {
+					accept.classList.add("disabled");
+					accept.disabled = true;
+				} else {
+					accept.classList.remove("disabled");
+					accept.disabled = false;
+				}
+				
+			}
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden");
+			}
+			if (item['uitype'] == "toggle") {
+				var toggle = new_setting.querySelector('#blank_model_settings_toggle');
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onchange = onchange_event;
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				toggle.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden");
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden");
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').classList.add("hidden");
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.onchange();
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').classList.add("hidden");
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	console.log(document.getElementById("modelplugin").value + "_settings_area");
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
 	
 }
 
@@ -1877,42 +1875,16 @@ function update_gpu_layers() {
 
 function load_model() {
 	var accept = document.getElementById('btn_loadmodelaccept');
-	gpu_layers = []
-	disk_layers = 0;
-	if (!(document.getElementById("modellayers").classList.contains("hidden"))) {
-		for (let i=0; i < document.getElementById("gpu_count").value; i++) {
-			gpu_layers.push(document.getElementById("gpu_layers_"+i).value);
-		}
-		if (document.getElementById("disk_layers")) {
-			disk_layers = document.getElementById("disk_layers").value;
-		}
-	}
-	//Need to do different stuff with custom models
-	if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu");
-		var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name");
-	} else {
-		var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model");
-		var path = "";
-	}
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
 	
-	let selected_models = [];
-	for (item of document.getElementById("oaimodel").selectedOptions) {
-		selected_models.push(item.value);
-	}
-	if (selected_models == ['']) {
-
-		selected_models = [];
-	} else if (selected_models.length == 1) {
-		selected_models = selected_models[0];
+	//get an object of all the input settings from the user
+	data = {}
+	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+		data[element.id.split("|")[1].replace("_value", "")] = element.value;
 	}
+	data = {...data, ...selected_model_data}
 	
-	message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked, 
-			   'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(), 
-			   'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value, 
-			   'online_model': selected_models,
-			   'use_8_bit': document.getElementById('use_8_bit').checked};
-	socket.emit("load_model", message);
+	socket.emit("load_model", data);
 	closePopups();
 }
 
diff --git a/templates/popups.html b/templates/popups.html
index 12c4c27a..59f07e70 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -46,35 +46,11 @@
 				<div id="model-spec-usage">Usage (VRAM)</div>
 			</span>
 		</span>
-		<div id="loadmodellistbreadcrumbs">
-			
-		</div>
+		<div id="loadmodellistbreadcrumbs"></div>
 		<div id="loadmodellistcontent" class="popup_list_area"></div>
+		<div id="loadmodelplugin" class="popup_load_cancel loadmodelsettings"><select id="modelplugin" class="settings_select hidden"></select></div>
+		<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
 		<div class="popup_load_cancel">
-			<div>
-				<input class="hidden fullwidth" type="text" placeholder="key" id="modelkey" onchange="socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});">
-				<input class="hidden fullwidth" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="hidden fullwidth" type="text" placeholder="Hugging Face Model Name" id="custommodelname" menu="" onblur="socket.emit('get_model_info', this.value);
-																																	   document.getElementById('btn_loadmodelaccept').setAttribute('selected_model', this.value);
-																																	   ">
-				<select class="hidden fullwidth settings_select" id="oaimodel"><option value="">Select OAI Model</option></select>
-			</div>
-			<div class="hidden" id=modellayers>
-				<div class="justifyleft">
-					GPU/Disk Layers<span class="material-icons-outlined helpicon" tooltip="Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.">help_icon</span>
-				</div>
-				<div class="justifyright"><span id="gpu_layers_current">0</span>/<span id="gpu_layers_max">0</span></div>
-				<div id=model_layer_bars style="color: white"></div>
-				<input type=hidden id='gpu_count' value=0/>
-			</div>
-			<div class="box flex-push-right hidden" id=use_gpu_div>
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-				<div class="box-label">Use GPU</div>
-			</div>
-			<div class="box flex-push-right hidden" id=use_8_bit_div onclick="set_8_bit_mode()">
-				<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_8_bit" checked>
-				<div class="box-label">Use 8 bit mode</div>
-			</div>
 			<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>
 			<button type="button" class="btn popup_load_cancel_button" onclick='closePopups();' id="btn_loadmodelclose">Cancel</button>
 		</div>
diff --git a/templates/templates.html b/templates/templates.html
index 4f16ff66..49cd3e5b 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -154,3 +154,22 @@
 		</div>
 	</div>
 </div>
+<!---------------- Model Settings ---------------------->
+<div id="blank_model_settings" class="setting_container_model">
+	<span class="setting_label">
+		<span id="blank_model_settings_label">:&nbsp;</span><span id="blank_model_settings_tooltip" class="helpicon material-icons-outlined" style="text-align: left;" tooltip="">help_icon</span>
+	</span>
+	<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
+	<span class="setting_item">
+		<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
+		<span id="blank_model_settings_checkbox_container">
+			<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">
+		</span>
+		<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
+		<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
+		<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">
+	</span>
+	<span class="setting_minlabel"><span style="position: relative;" id="blank_model_settings_min_label"></span></span>
+	<span class="setting_maxlabel"><span style="position: relative;" id="blank_model_settings_max_label"></span></span>
+	</span>
+</div>
\ No newline at end of file

From 546ba84723c84dec3a6f8cc70e41408fd66efa67 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Wed, 10 May 2023 19:10:23 -0500
Subject: [PATCH 017/102] Fix memory->genre bug in context viewer bar tooltip

Crazy change I know
---
 static/koboldai.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index cfc32d21..87beb954 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -4006,7 +4006,7 @@ function update_context(data) {
 					document.getElementById('world_info_'+entry.uid).classList.add("used_in_game");
 				}
 				break;
-			case 'memory':
+			case 'genre':
 				genre_length += entry.tokens.length;
 				break;
 			case 'memory':

From 84e4cb0f4a216e58063cf1f61a0adb0c7b27124a Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 11 May 2023 13:44:53 +0200
Subject: [PATCH 018/102] Update Transformers

---
 environments/huggingface.yml | 2 +-
 environments/rocm.yml        | 2 +-
 requirements.txt             | 2 +-
 requirements_mtj.txt         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 1cc5a9c7..3d0ca633 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,7 +32,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.0
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index a33a8f96..eb2927bd 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -30,7 +30,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.0
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/requirements.txt b/requirements.txt
index 4eb2c282..28fdb28c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.28.0
+transformers==4.29.*
 huggingface_hub==0.12.1
 Flask==2.2.3
 Flask-SocketIO==5.3.2
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 1b40fded..7fc866f0 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,7 @@ requests
 dm-haiku==0.0.9
 jax==0.3.25
 jaxlib==0.3.25
-transformers == 4.28.0
+transformers==4.29.*
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2

From e932364a1e3efe0c6973f1a19f4093115068c77d Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 11 May 2023 14:56:12 +0200
Subject: [PATCH 019/102] RWKV support

---
 aiserver.py                       |  31 ++--
 modeling/inference_models/rwkv.py | 237 ------------------------------
 2 files changed, 11 insertions(+), 257 deletions(-)
 delete mode 100644 modeling/inference_models/rwkv.py

diff --git a/aiserver.py b/aiserver.py
index ef49f05c..b045ea71 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -136,7 +136,6 @@ class MenuModelType(Enum):
     HUGGINGFACE = 0
     ONLINE_API = 1
     OTHER = 2
-    RWKV = 3
 
 class MenuItem:
     def __init__(
@@ -222,7 +221,7 @@ model_menu = {
         MenuFolder("Untuned Fairseq Dense", "fsdlist"),
         MenuFolder("Untuned Bloom", "bloomlist"),
         MenuFolder("Untuned XGLM", "xglmlist"),
-        MenuFolder("Untuned RWKV-4 (Experimental)", "rwkvlist", experimental=True),
+        MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
         MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
@@ -349,16 +348,16 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV-4 14B ctx4096", "rwkv-4-pile-14b:ctx4096", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 14B ctx1024", "rwkv-4-pile-14b", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 7B ctx4096", "rwkv-4-pile-7b:ctx4096", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 7B ctx1024", "rwkv-4-pile-7b", "??GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 3B ctx4096", "rwkv-4-pile-3b:ctx4096", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 3B ctx1024", "rwkv-4-pile-3b", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 1.5B ctx4096", "rwkv-4-pile-1b5:ctx4096", "9GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 1.5B ctx1024", "rwkv-4-pile-1b5", "9GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 340M", "rwkv-4-pile-430m", "?GB", model_type=MenuModelType.RWKV),
-        MenuModel("RWKV-4 169M ctx1024", "rwkv-4-pile-169m", "?GB", model_type=MenuModelType.RWKV),
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [
@@ -1567,8 +1566,6 @@ def get_model_info(model, directory=""):
                     print(":(")
                     pass
         key = True
-    elif "rwkv" in model.lower():
-        pass
     elif model == 'ReadOnly':
         pass
     #elif model == 'customhuggingface':
@@ -1946,12 +1943,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
 
         model.load(initial_load=initial_load)
     # TODO: This check sucks, make a model object or somethign
-    elif "rwkv" in koboldai_vars.model:
-        if koboldai_vars.use_colab_tpu:
-            raise RuntimeError("RWKV is not supported on the TPU.")
-        from modeling.inference_models.rwkv import RWKVInferenceModel
-        model = RWKVInferenceModel(koboldai_vars.model)
-        model.load()
     elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
         # HF Torch
         logger.init("Transformers", status='Starting')
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
deleted file mode 100644
index 006bb8fd..00000000
--- a/modeling/inference_models/rwkv.py
+++ /dev/null
@@ -1,237 +0,0 @@
-from __future__ import annotations
-import os
-
-
-import time
-from typing import Dict, List, Optional, Union
-import numpy as np
-import requests
-from tokenizers import Tokenizer
-from tqdm import tqdm
-from huggingface_hub import hf_hub_url
-
-import torch
-from torch.nn import functional as F
-
-# Must be defined before import
-os.environ["RWKV_JIT_ON"] = "1"
-# TODO: Include compiled kernel
-os.environ["RWKV_CUDA_ON"] = "1"
-from rwkv.model import RWKV
-
-import utils
-from logger import logger
-
-from modeling import warpers
-from modeling.warpers import Warper
-from modeling.stoppers import Stoppers
-from modeling.post_token_hooks import PostTokenHooks
-from modeling.tokenizer import GenericTokenizer
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-    ModelCapabilities,
-)
-
-TOKENIZER_URL = (
-    "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json"
-)
-TOKENIZER_PATH = "models/rwkv/20b_tokenizer.json"
-
-REPO_OWNER = "BlinkDL"
-MODEL_FILES = {
-    "rwkv-4-pile-14b": "RWKV-4-Pile-14B-20230213-8019.pth",
-    # NOTE: Still in progress(?)
-    "rwkv-4-pile-14b:ctx4096": "RWKV-4-Pile-14B-20230228-ctx4096-test663.pth",
-    "rwkv-4-pile-7b": "RWKV-4-Pile-7B-20221115-8047.pth",
-    "rwkv-4-pile-7b:ctx4096": "RWKV-4-Pile-7B-20230109-ctx4096.pth",
-    "rwkv-4-pile-3b": "RWKV-4-Pile-3B-20221008-8023.pth",
-    "rwkv-4-pile-3b:ctx4096": "RWKV-4-Pile-3B-20221110-ctx4096.pth",
-    "rwkv-4-pile-1b5": "RWKV-4-Pile-1B5-20220903-8040.pth",
-    "rwkv-4-pile-1b5:ctx4096": "RWKV-4-Pile-1B5-20220929-ctx4096.pth",
-    "rwkv-4-pile-430m": "RWKV-4-Pile-430M-20220808-8066.pth",
-    "rwkv-4-pile-169m": "RWKV-4-Pile-169M-20220807-8023.pth",
-}
-
-
-class RWKVInferenceModel(InferenceModel):
-    def __init__(
-        self,
-        model_name: str,
-    ) -> None:
-        super().__init__()
-        self.model_name = model_name
-
-        self.post_token_hooks = [
-            PostTokenHooks.stream_tokens,
-        ]
-
-        self.stopper_hooks = [
-            Stoppers.core_stopper,
-            Stoppers.dynamic_wi_scanner,
-            Stoppers.singleline_stopper,
-            Stoppers.chat_mode_stopper,
-            Stoppers.stop_sequence_stopper,
-        ]
-
-        self.capabilties = ModelCapabilities(
-            embedding_manipulation=False,
-            post_token_hooks=True,
-            stopper_hooks=True,
-            post_token_probs=True,
-        )
-        self._old_stopping_criteria = None
-
-    def _ensure_directory_structure(self) -> None:
-        for path in ["models/rwkv", "models/rwkv/models"]:
-            try:
-                os.mkdir(path)
-            except FileExistsError:
-                pass
-
-    def _get_tokenizer(self) -> GenericTokenizer:
-        if not os.path.exists(TOKENIZER_PATH):
-            logger.info("RWKV tokenizer not found, downloading...")
-
-            r = requests.get(TOKENIZER_URL)
-            with open(TOKENIZER_PATH, "wb") as file:
-                file.write(r.content)
-
-        return GenericTokenizer(Tokenizer.from_file(TOKENIZER_PATH))
-
-    def _download_model(self, model_path: str, model_class: str) -> None:
-        logger.info(f"{self.model_name} not found, downloading...")
-
-        url = hf_hub_url(
-            repo_id=f"{REPO_OWNER}/{model_class}",
-            filename=MODEL_FILES[self.model_name],
-        )
-
-        # TODO: Use aria2
-        # https://stackoverflow.com/a/57030446
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            bar = tqdm(
-                desc="Downloading RWKV Model",
-                unit="B",
-                unit_scale=True,
-                total=int(r.headers["Content-Length"]),
-            )
-            with open(model_path, "wb") as file:
-                for chunk in r.iter_content(chunk_size=8192):
-                    if not chunk:
-                        continue
-                    file.write(chunk)
-                    bar.update(len(chunk))
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self._ensure_directory_structure()
-        self.tokenizer = self._get_tokenizer()
-
-        # Parse model name
-        model_class, _, special = self.model_name.partition(":")
-        special = special or None
-
-        model_dir = os.path.join("models", "rwkv", "models", model_class)
-        if not os.path.exists(model_dir):
-            os.mkdir(model_dir)
-
-        # Download model if we need to
-        model_path = os.path.join(model_dir, MODEL_FILES[self.model_name])
-        if not os.path.exists(model_path):
-            self._download_model(model_path, model_class)
-
-        # Now we load!
-
-        # TODO: Breakmodel to strat
-        self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
-
-    def _apply_warpers(
-        self, scores: torch.Tensor, input_ids: torch.Tensor
-    ) -> torch.Tensor:
-        warpers.update_settings()
-        for sid in utils.koboldai_vars.sampler_order:
-            warper = Warper.from_id(sid)
-
-            if not warper.value_is_valid():
-                continue
-
-            if warper == warpers.RepetitionPenalty:
-                # Rep pen needs more data than other samplers
-                scores = warper.torch(scores, input_ids=input_ids)
-            else:
-                scores = warper.torch(scores)
-        return scores
-
-    def _sample_token(self, logits: torch.Tensor, input_ids: torch.Tensor) -> int:
-        probs = F.softmax(logits.float(), dim=-1)
-
-        if probs.device == torch.device("cpu"):
-            probs = probs.numpy()
-            sorted_ids = np.argsort(probs)
-            sorted_probs = probs[sorted_ids][::-1]
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            probs = probs / np.sum(probs)
-            out = np.random.choice(a=len(probs), p=probs)
-            return int(out)
-        else:
-            sorted_ids = torch.argsort(probs)
-            sorted_probs = probs[sorted_ids]
-            sorted_probs = torch.flip(sorted_probs, dims=(0,))
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            out = torch.multinomial(probs, num_samples=1)[0]
-            return int(out)
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-        if seed is not None:
-            torch.manual_seed(seed)
-
-        aux_device = utils.get_auxilary_device()
-        context = torch.tensor(prompt_tokens)[None, :].to(aux_device)
-        out = []
-
-        start_time = time.time()
-        with torch.no_grad():
-            logits, state = self.model.forward(prompt_tokens, None)
-            last_token = prompt_tokens[-1]
-
-            for _ in range(max_new):
-
-                logits, state = self.model.forward([last_token], state)
-                last_token = self._sample_token(logits, context)
-                out.append(last_token)
-                add = torch.tensor([[last_token]]).to(aux_device)
-                context = torch.cat((context, add), dim=-1)
-                self._post_token_gen(context)
-
-        logger.debug(
-            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
-        )
-
-        return GenerationResult(
-            self,
-            out_batches=torch.tensor([out]),
-            prompt=prompt_tokens,
-            is_whole_generation=False,
-            output_includes_prompt=True,
-        )

From 77dd5aa7259f65262f6077957b493c74d98eaa24 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 09:09:09 -0400
Subject: [PATCH 020/102] Minor update

---
 aiserver.py                        | 7 +++++--
 modeling/inference_models/horde.py | 2 +-
 static/koboldai.js                 | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index e7227c81..ac90d6f4 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6489,7 +6489,7 @@ def UI_2_select_model(data):
             if valid:
                 logger.debug("Valid Loaders: {}".format(valid_loaders))
                 emit("selected_model_info", valid_loaders)
-        if not valid:
+        if not valid and 'path' in data:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
             output = []
@@ -6501,7 +6501,9 @@ def UI_2_select_model(data):
                         break
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
-    
+        elif not valid:
+            logger.error("Nothing can load the model: {}".format(valid_loaders))
+            
     return
     
     
@@ -6530,6 +6532,7 @@ def UI_2_select_model(data):
 def UI_2_load_model(data):
     logger.info("loading Model")
     logger.info(data)
+    model_loaders[data['plugin']].set_input_parameters(**data)
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 56e88205..f02cf265 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -67,7 +67,7 @@ class model_loader(InferenceModel):
                                         "unit": "text",
                                         "label": "Model",
                                         "id": "model",
-                                        "default": "",
+                                        "default": model_name,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "Which model to use when running OpenAI/GooseAI.",
                                         "menu_path": "",
diff --git a/static/koboldai.js b/static/koboldai.js
index 0656253f..1907add8 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1882,7 +1882,9 @@ function load_model() {
 	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 		data[element.id.split("|")[1].replace("_value", "")] = element.value;
 	}
-	data = {...data, ...selected_model_data}
+	data = {...data, ...selected_model_data};
+	
+	data['plugin'] = document.getElementById("modelplugin").value;
 	
 	socket.emit("load_model", data);
 	closePopups();

From edd9c7d782c9c59f9052f41e9f21498d2cdcaef2 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 11 May 2023 15:13:59 +0200
Subject: [PATCH 021/102] Warning polish

---
 koboldai_settings.py        | 3 ++-
 modeling/inference_model.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index d8416df2..7bc88422 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1129,7 +1129,7 @@ class story_settings(settings):
                 
 class user_settings(settings):
     local_only_variables = ['importjs']
-    no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"]
+    no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision", "model_selected"]
     settings_name = "user"
     def __init__(self, socketio):
         self._socketio = socketio
@@ -1185,6 +1185,7 @@ class user_settings(settings):
         self.horde_api_key = "0000000000"
         self.horde_worker_name = "My Awesome Instance"
         self.horde_url = "https://horde.koboldai.net"
+        self.model_selected = ""
         
     def __setattr__(self, name, value):
         new_variable = name not in self.__dict__
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index b253c5bf..e2329cf9 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -218,7 +218,7 @@ class InferenceModel:
             try:
                 return GenericTokenizer(try_get_tokenizer())
             except Exception as e:
-                logger.warning(f"Tokenizer falling back due to {e}")
+                logger.warning(f"Tokenizer falling back due to {e} (This can be normal behavior for some architectures that lack a slow tokenizer such as NeoX)")
                 # If we error on each attempt, raise the last one
                 if i == len(suppliers) - 1:
                     raise

From 4605d10c370b994cfbd1d27891ccae6ade8b9c6b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 12:08:35 -0400
Subject: [PATCH 022/102] Next iteration. Model Loading is broken completely
 now :)

---
 aiserver.py                                   | 180 +++---------------
 modeling/inference_model.py                   |   6 +-
 modeling/inference_models/api.py              |   4 +-
 modeling/inference_models/basic_api.py        |   4 +-
 modeling/inference_models/generic_hf_torch.py |   1 +
 modeling/inference_models/horde.py            |   8 +-
 modeling/inference_models/openai.py           |   6 +-
 modeling/inference_models/parents/hf.py       |  56 ++++--
 modeling/inference_models/parents/hf_torch.py |   2 +-
 modeling/inference_models/readonly.py         |  77 ++++++++
 static/koboldai.js                            |  13 +-
 11 files changed, 170 insertions(+), 187 deletions(-)
 create mode 100644 modeling/inference_models/readonly.py

diff --git a/aiserver.py b/aiserver.py
index ac90d6f4..f9e60641 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -645,10 +645,14 @@ def new_socketio_on(*a, **k):
 socketio.on = new_socketio_on
 
 def emit(*args, **kwargs):
-    try:
-        return _emit(*args, **kwargs)
-    except AttributeError:
-        return socketio.emit(*args, **kwargs)
+    if has_request_context():
+        try:
+            return _emit(*args, **kwargs)
+        except AttributeError:
+            return socketio.emit(*args, **kwargs)
+    else: #We're trying to send data outside of the http context. This won't work. Try the relay
+        if koboldai_settings.queue is not None:
+            koboldai_settings.queue.put([args[0], args[1], kwargs])
 utils.emit = emit
 
 #replacement for tpool.execute to maintain request contexts
@@ -1780,10 +1784,6 @@ def get_cluster_models(msg):
     emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
     emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
 
-
-def reset_model_settings():
-    koboldai_vars.reset_for_model_load()
-    
     
 def unload_model():
     global model
@@ -1816,7 +1816,7 @@ def unload_model():
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
     
     
-def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False):
+def load_model(plugin, initial_load=False):
     global model
     global tokenizer
     global model_config
@@ -1827,79 +1827,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
     if initial_load:
         use_breakmodel_args = True
 
-    reset_model_settings()
     koboldai_vars.reset_model()
 
-    koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model
-    if koboldai_vars.cluster_requested_models == [""]:
-        koboldai_vars.cluster_requested_models = []
-
     koboldai_vars.noai = False
-    if not use_breakmodel_args:
-        set_aibusy(True)
-        if koboldai_vars.model != 'ReadOnly':
-            emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
-            #Have to add a sleep so the server will send the emit for some reason
-            time.sleep(0.1)
+    set_aibusy(True)
+    if koboldai_vars.model != 'ReadOnly':
+        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
+        #Have to add a sleep so the server will send the emit for some reason
+        time.sleep(0.1)
 
-    if gpu_layers is not None:
-        args.breakmodel_gpulayers = gpu_layers
-    elif use_breakmodel_args:
-        gpu_layers = args.breakmodel_gpulayers
-    if breakmodel_args_default_to_cpu and gpu_layers is None:
-        gpu_layers = args.breakmodel_gpulayers = []
-    if disk_layers is not None:
-        args.breakmodel_disklayers = int(disk_layers)
-    elif use_breakmodel_args:
-        disk_layers = args.breakmodel_disklayers
-    if breakmodel_args_default_to_cpu and disk_layers is None:
-        disk_layers = args.breakmodel_disklayers = 0
+    if 'model' in globals():
+        model.unload()
     
-    unload_model()
-    
-    if online_model == "":
-        koboldai_vars.configname = getmodelname()
-    #Let's set the GooseAI or OpenAI server URLs if that's applicable
-    else:
-        koboldai_vars.online_model = online_model
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}"
-        elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list):
-                if len(online_model) != 1:
-                    koboldai_vars.configname = koboldai_vars.model
-                else:
-                    koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}"
-        else:
-            koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}"
-
-        if path.exists(get_config_filename()):
-            changed=False
-            with open(get_config_filename(), "r") as file:
-                # Check if API key exists
-                js = json.load(file)
-                if 'online_model' in js:
-                    if js['online_model'] != online_model:
-                        changed=True
-                        js['online_model'] = online_model
-                else:
-                    changed=True
-                    js['online_model'] = online_model
-
-            if changed:
-                with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file:
-                    file.write(json.dumps(js, indent=3))
-
-        # Swap OAI Server if GooseAI was selected
-        if koboldai_vars.model == "GooseAI":
-            koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-            koboldai_vars.model = "OAI"
-            args.configname = "GooseAI" + "/" + online_model
-        elif koboldai_vars.model != "CLUSTER":
-            args.configname = koboldai_vars.model + "/" + online_model
-        koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model)
     
     # If transformers model was selected & GPU available, ask to use CPU or GPU
     if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
@@ -1937,84 +1876,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
     else:
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
-
-    # Ask for API key if InferKit was selected
-    if koboldai_vars.model == "InferKit":
-        koboldai_vars.apikey = koboldai_vars.oaiapikey
                     
-    # Swap OAI Server if GooseAI was selected
-    if koboldai_vars.model == "GooseAI":
-        koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines"
-        koboldai_vars.model = "OAI"
-        koboldai_vars.configname = "GooseAI"
-
-    # Ask for API key if OpenAI was selected
-    if koboldai_vars.model == "OAI" and not koboldai_vars.configname:
-        koboldai_vars.configname = "OAI"
-        
-    if koboldai_vars.model == "ReadOnly":
-        koboldai_vars.noai = True
-
-    # TODO: InferKit
-    if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
-        pass
-    elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
-        koboldai_vars.colaburl = url or koboldai_vars.colaburl
-        koboldai_vars.usegpu = False
-        koboldai_vars.breakmodel = False
-
-        if koboldai_vars.model == "Colab":
-            from modeling.inference_models.basic_api import model_loader
-            model = model_loader()
-        elif koboldai_vars.model == "API":
-            from modeling.inference_models.api import model_loader
-            model = model_loader(koboldai_vars.colaburl.replace("/request", ""))
-        elif koboldai_vars.model == "CLUSTER":
-            from modeling.inference_models.horde import model_loader
-            model = model_loader()
-        elif koboldai_vars.model == "OAI":
-            from modeling.inference_models.openai import model_loader
-            model = model_loader()
-
-        model.load(initial_load=initial_load)
-    # TODO: This check sucks, make a model object or somethign
-    elif "rwkv" in koboldai_vars.model:
-        if koboldai_vars.use_colab_tpu:
-            raise RuntimeError("RWKV is not supported on the TPU.")
-        from modeling.inference_models.rwkv import model_loader
-        model = model_loader(koboldai_vars.model)
-        model.load()
-    elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai:
-        # HF Torch
-        logger.init("Transformers", status='Starting')
-        for m in ("GPTJModel", "XGLMModel"):
-            try:
-                globals()[m] = getattr(__import__("transformers"), m)
-            except:
-                pass
-
-        from modeling.inference_models.generic_hf_torch import model_loader
-        model = model_loader(
-            koboldai_vars.model,
-            lazy_load=koboldai_vars.lazy_load,
-            low_mem=args.lowmem
-        )
-
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
-        logger.info(f"Pipeline created: {koboldai_vars.model}")
-    else:
-        # TPU
-        from modeling.inference_models.hf_mtj import model_loader
-        model = model_loader(
-            koboldai_vars.model
-        )
-        model.load(
-            save_model=not (args.colab or args.cacheonly) or args.savemodel,
-            initial_load=initial_load,
-        )
+    model = model_loaders[plugin]
+    model.load(initial_load=initial_load)
     
     # TODO: Convert everywhere to use model.tokenizer
     if model:
@@ -6532,7 +6396,8 @@ def UI_2_select_model(data):
 def UI_2_load_model(data):
     logger.info("loading Model")
     logger.info(data)
-    model_loaders[data['plugin']].set_input_parameters(**data)
+    model_loaders[data['plugin']].set_input_parameters(data)
+    load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
 #==================================================================#
@@ -8155,7 +8020,8 @@ def send_one_time_messages(data, wait_time=0):
 # Test
 #==================================================================#
 def model_info():
-    if model_config is not None:
+    global model_config
+    if 'model_config' in globals() and model_config is not None:
         if isinstance(model_config, dict):
             if 'model_type' in model_config:
                 model_type = str(model_config['model_type'])
@@ -11045,7 +10911,7 @@ for schema in config_endpoint_schemas:
 def startup():
     if koboldai_vars.model == "" or koboldai_vars.model is None:
         koboldai_vars.model = "ReadOnly"
-    socketio.start_background_task(load_model, **{'initial_load':True})
+        socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True})
             
 print("", end="", flush=True)
 
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 27ad46db..343eb39a 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -169,6 +169,7 @@ class InferenceModel:
         ]
         self.tokenizer = None
         self.capabilties = ModelCapabilities()
+        self.model_name = "Not Defined"
     
     def is_valid(self, model_name, model_path, menu_path, vram):
         return True
@@ -176,7 +177,7 @@ class InferenceModel:
     def requested_parameters(self, model_name, model_path, menu_path, vram):
         return {}
         
-    def define_input_parameters(self):
+    def set_input_parameters(self, parameters):
         return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
@@ -186,6 +187,9 @@ class InferenceModel:
         self._load(save_model=save_model, initial_load=initial_load)
         self._post_load()
 
+    def unload(self):
+        return
+
     def _pre_load(self) -> None:
         """Pre load hook. Called before `_load()`."""
 
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index 41088bc7..5bddd714 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -46,8 +46,8 @@ class model_loader(InferenceModel):
                                     })
         return requested_parameters
         
-    def set_input_parameters(self, base_url=""):
-        self.base_url = base_url.rstrip("/")
+    def set_input_parameters(self, parameters):
+        self.base_url = parameters['base_url'].rstrip("/")
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"]
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index d7fc0863..5666ba8e 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -45,8 +45,8 @@ class model_loader(InferenceModel):
                                     })
         return requested_parameters
         
-    def set_input_parameters(self, colaburl=""):
-        self.colaburl = colaburl
+    def set_input_parameters(self, parameters):
+        self.colaburl = parameters['colaburl']
 
     def _initialize_model(self):
         return
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index 366fbbb7..b542c712 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -30,6 +30,7 @@ class model_loader(HFTorchInferenceModel):
     
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
+        self.lazy_load = utils.koboldai_vars.lazy_load
 
         # Make model path the same as the model name to make this consistent
         # with the other loading method if it isn't a known model type. This
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index f02cf265..057669d7 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -78,10 +78,10 @@ class model_loader(InferenceModel):
                                     }])
         return requested_parameters
         
-    def set_input_parameters(self, url="", key="", model=""):
-        self.key = key.strip()
-        self.model = model
-        self.url = url
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
+        self.url = parameters['url']
         
     def get_cluster_models(self):
         # Get list of models from public cluster
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index 01c0c037..efbb01d3 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -59,9 +59,9 @@ class model_loader(InferenceModel):
                                     }])
         return requested_parameters
         
-    def set_input_parameters(self, key="", model=""):
-        self.key = key.strip()
-        self.model = model
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
 
     def get_oai_models(self):
         if self.key == "":
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 54781296..3099feaf 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -34,12 +34,12 @@ class HFInferenceModel(InferenceModel):
         requested_parameters = []
         
         if model_path is not None and os.path.exists(model_path):
-            model_config = AutoConfig.from_pretrained(model_path)
+            self.model_config = AutoConfig.from_pretrained(model_path)
         elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-            model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+            self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
         else:
-            model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
-        layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None
+            self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+        layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
         if layer_count is not None and layer_count >= 0:
             if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
                 with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
@@ -61,11 +61,11 @@ class HFInferenceModel(InferenceModel):
                                                 "uitype": "slider",
                                                 "unit": "int",
                                                 "label": "{} Layers".format(torch.cuda.get_device_name(i)),
-                                                "id": "{} Layers".format(i),
+                                                "id": "{}_Layers".format(i),
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
-                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                                 "default": break_values[i],
                                                 "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
@@ -77,11 +77,11 @@ class HFInferenceModel(InferenceModel):
                                             "uitype": "slider",
                                             "unit": "int",
                                             "label": "CPU Layers",
-                                            "id": "CPU Layers",
+                                            "id": "CPU_Layers",
                                             "min": 0,
                                             "max": layer_count,
                                             "step": 1,
-                                            "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                            "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                             "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                             "default": layer_count - sum(break_values),
                                             "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
@@ -98,7 +98,7 @@ class HFInferenceModel(InferenceModel):
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
-                                                "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
                                                 "default": disk_blocks,
                                                 "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
@@ -122,10 +122,40 @@ class HFInferenceModel(InferenceModel):
         
         return requested_parameters
         
-    def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False):
+    def set_input_parameters(self, parameters):
+        gpu_count = torch.cuda.device_count()
+        layers = []
+        for i in range(gpu_count):
+            layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+        self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
         self.layers = layers
-        self.disk_layers = disk_layers
-        self.use_gpu = use_gpu
+        self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None
+        self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+        self.model_name = parameters['id']
+        self.path = parameters['path'] if 'path' in parameters else None
+
+    def unload(self):
+        if hasattr(self, 'model'):
+            self.model = None
+        if hasattr(self, 'tokenizer'):
+            self.tokenizer = None
+        if hasattr(self, 'model_config'):
+            self.model_config = None
+        with torch.no_grad():
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated")
+                for tensor in gc.get_objects():
+                    try:
+                        if torch.is_tensor(tensor):
+                            tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype))
+                    except:
+                        pass
+        gc.collect()
+        try:
+            with torch.no_grad():
+                torch.cuda.empty_cache()
+        except:
+            pass
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
@@ -187,7 +217,7 @@ class HFInferenceModel(InferenceModel):
 
             return model_path
 
-        basename = utils.koboldai_vars.model.replace("/", "_")
+        basename = self.model_name.replace("/", "_")
         if legacy:
             ret = basename
         else:
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index d8afafb1..4de13d7b 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -398,7 +398,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         Embedding._koboldai_patch_causallm_model = self.model
 
     def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True):
-        if not self.lazy_load:
+        if not utils.koboldai_vars.lazy_load:
             return
 
         if utils.args.breakmodel_disklayers is not None:
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py
new file mode 100644
index 00000000..c642c05a
--- /dev/null
+++ b/modeling/inference_models/readonly.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+    ModelCapabilities,
+)
+
+
+class BasicAPIException(Exception):
+    """To be used for errors when using the Basic API as an interface."""
+
+
+class model_loader(InferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+
+        # Do not allow API to be served over the API
+        self.capabilties = ModelCapabilities(api_host=False)
+        self.tokenizer = self._tokenizer()
+        self.model = None
+        self.model_name = "Read Only"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "ReadOnly"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        requested_parameters = []
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        return
+
+    def unload(self):
+        utils.koboldai_vars.noai = False
+
+    def _initialize_model(self):
+        return
+    
+    class _tokenizer():
+        def __init__(self):
+            self._koboldai_header = []
+        def decode(self, _input):
+            return ""
+        def encode(self, input_text):
+            return []
+
+    def _load(self, save_model: bool = False, initial_load: bool = False) -> None:
+        self.tokenizer = self.tokenizer
+        self.model = None
+        utils.koboldai_vars.noai = True
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ):
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
diff --git a/static/koboldai.js b/static/koboldai.js
index 1907add8..7f004ff2 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -14,8 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);});
 socket.on('popup_items', function(data){popup_items(data);});
 socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 socket.on('popup_edit_file', function(data){popup_edit_file(data);});
-socket.on('show_model_menu', function(data){show_model_menu(data);});
-socket.on('open_model_load_menu', function(data){new_show_model_menu(data);});
+//socket.on('show_model_menu', function(data){show_model_menu(data);});
+socket.on('open_model_load_menu', function(data){show_model_menu(data);});
 socket.on('selected_model_info', function(data){selected_model_info(data);});
 socket.on('oai_engines', function(data){oai_engines(data);});
 socket.on('buildload', function(data){buildload(data);});
@@ -1502,13 +1502,18 @@ function getModelParameterCount(modelName) {
 	return base * multiplier;
 }
 
-function new_show_model_menu(data) {
+function show_model_menu(data) {
 	//clear out the loadmodelsettings
 	var loadmodelsettings = document.getElementById('loadmodelsettings')
 	while (loadmodelsettings.firstChild) {
 		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
-	document.getElementById("modelplugin").classList.add("hidden");
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
 	var accept = document.getElementById("btn_loadmodelaccept");
 	accept.disabled = false;
 	

From 20b54eb9ff829526161c2822ada507b6c80bee41 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Thu, 11 May 2023 19:06:39 +0200
Subject: [PATCH 023/102] Revert 4.29 due to unforseen consequences

---
 aiserver.py                  | 2 +-
 environments/huggingface.yml | 2 +-
 environments/rocm.yml        | 2 +-
 requirements.txt             | 2 +-
 requirements_mtj.txt         | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b045ea71..1abdd31e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -221,7 +221,7 @@ model_menu = {
         MenuFolder("Untuned Fairseq Dense", "fsdlist"),
         MenuFolder("Untuned Bloom", "bloomlist"),
         MenuFolder("Untuned XGLM", "xglmlist"),
-        MenuFolder("Official RWKV-4", "rwkvlist"),
+        #MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
         MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 3d0ca633..af16423e 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,7 +32,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.29.*
+    - transformers==4.28.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index eb2927bd..ffcacfb6 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -30,7 +30,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.29.*
+    - transformers==4.28.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/requirements.txt b/requirements.txt
index 28fdb28c..c98b7252 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.29.*
+transformers==4.28.*
 huggingface_hub==0.12.1
 Flask==2.2.3
 Flask-SocketIO==5.3.2
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 7fc866f0..b41b7ead 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,7 @@ requests
 dm-haiku==0.0.9
 jax==0.3.25
 jaxlib==0.3.25
-transformers==4.29.*
+transformers==4.28.*
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2

From e9c845dc2a1eae4927ed2a7417c6aa6969329bb9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 14:14:52 -0400
Subject: [PATCH 024/102] Fix for badwordIDs

---
 modeling/inference_models/generic_hf_torch.py | 2 +-
 modeling/inference_models/parents/hf.py       | 1 +
 modeling/inference_models/parents/hf_torch.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index b542c712..d5cf6397 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -43,7 +43,7 @@ class model_loader(HFTorchInferenceModel):
             self.model_name = os.path.basename(
                 os.path.normpath(utils.koboldai_vars.custmodpth)
             )
-            utils.koboldai_vars.model = self.model_name
+        utils.koboldai_vars.model = self.model_name
 
         # If we specify a model and it's in the root directory, we need to move
         # it to the models directory (legacy folder structure to new)
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 3099feaf..1941a12e 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel):
 
         self.model = None
         self.tokenizer = None
+        self.badwordsids = koboldai_settings.badwordsids_default
 
     def is_valid(self, model_name, model_path, menu_path):
         try:
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 4de13d7b..7cc16ad5 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -245,7 +245,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                     len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
                 ),
                 repetition_penalty=1.0,
-                bad_words_ids=utils.koboldai_vars.badwordsids
+                bad_words_ids=self.badwordsids
                 + additional_bad_words_ids,
                 use_cache=True,
                 num_return_sequences=batch_count,

From a9c785d0f0020847e342f18f9910f1ed9c4871dd Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 14:20:14 -0400
Subject: [PATCH 025/102] Fix for Horde

---
 modeling/inference_models/horde.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 057669d7..bd457197 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -116,9 +116,9 @@ class model_loader(InferenceModel):
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer(
-            utils.koboldai_vars.cluster_requested_models[0]
-            if len(utils.koboldai_vars.cluster_requested_models) > 0
-            else "gpt2",
+            self.model
+            #if len(self.model) > 0
+            #else "gpt2",
         )
 
     def _raw_generate(
@@ -166,14 +166,14 @@ class model_loader(InferenceModel):
 
         client_agent = "KoboldAI:2.0.0:koboldai.org"
         cluster_headers = {
-            "apikey": utils.koboldai_vars.horde_api_key,
+            "apikey": self.key,
             "Client-Agent": client_agent,
         }
 
         try:
             # Create request
             req = requests.post(
-                f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async",
+                f"{self.url}/api/v2/generate/text/async",
                 json=cluster_metadata,
                 headers=cluster_headers,
             )
@@ -211,7 +211,7 @@ class model_loader(InferenceModel):
         while not finished:
             try:
                 req = requests.get(
-                    f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}",
+                    f"{self.url}/api/v2/generate/text/status/{request_id}",
                     headers=cluster_agent_headers,
                 )
             except requests.exceptions.ConnectionError:

From c16336f6467fe11a8644b551d5700986d2ef4bf6 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Thu, 11 May 2023 17:10:19 -0500
Subject: [PATCH 026/102] Add traceback to debug log on fallback

---
 modeling/inference_models/hf_torch.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 990fabfc..14ddd7af 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -332,10 +332,13 @@ class HFTorchInferenceModel(HFInferenceModel):
                 raise
 
             logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
+            logger.debug(traceback.format_exc())
+
             try:
                 return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
             except Exception as e:
                 logger.warning(f"Fell back to GPTNeoForCausalLM due to {e}")
+                logger.debug(traceback.format_exc())
                 return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs)
 
     def get_hidden_size(self) -> int:

From 3065c1b40e758993565ea212ccf9f3b0db5c7f0e Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Thu, 11 May 2023 17:10:43 -0500
Subject: [PATCH 027/102] Ignore missing keys in get_original_key

---
 modeling/inference_models/hf_torch.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 14ddd7af..3f7c3967 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -465,19 +465,25 @@ class HFTorchInferenceModel(HFInferenceModel):
             device_map: Dict[str, Union[str, int]] = {}
 
             @functools.lru_cache(maxsize=None)
-            def get_original_key(key):
-                return max(
-                    (
-                        original_key
-                        for original_key in utils.module_names
-                        if original_key.endswith(key)
-                    ),
-                    key=len,
-                )
+            def get_original_key(key) -> Optional[str]:
+                key_candidates = [
+                    original_key
+                    for original_key in utils.module_names
+                    if original_key.endswith(key)
+                ]
+
+                if not key_candidates:
+                    logger.debug(f"!!! No key candidates for {key}")
+                    return None
+
+                return max(key_candidates, key=len)
 
             for key, value in model_dict.items():
                 original_key = get_original_key(key)
 
+                if not original_key:
+                    continue
+
                 if isinstance(value, lazy_loader.LazyTensor) and not any(
                     original_key.startswith(n) for n in utils.layers_module_names
                 ):

From 69d942c00cfd16708f82826fcc0d50355e322c0f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 20:22:30 -0400
Subject: [PATCH 028/102] Kind of working breakmodel

---
 aiserver.py                                   | 256 +-----------------
 koboldai_settings.py                          |   3 +-
 modeling/inference_models/generic_hf_torch.py |   7 +-
 modeling/inference_models/gooseai.py          |  31 +++
 modeling/inference_models/hf_mtj.py           |   2 +-
 modeling/inference_models/openai.py           | 168 +-----------
 modeling/inference_models/parents/hf.py       |  35 ++-
 modeling/inference_models/parents/hf_torch.py |  27 +-
 .../parents/openai_gooseai.py                 | 189 +++++++++++++
 static/koboldai.js                            |   6 +
 10 files changed, 281 insertions(+), 443 deletions(-)
 create mode 100644 modeling/inference_models/gooseai.py
 create mode 100644 modeling/inference_models/parents/openai_gooseai.py

diff --git a/aiserver.py b/aiserver.py
index f9e60641..158a6699 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1473,7 +1473,7 @@ def general_startup(override_args=None):
         koboldai_vars.quiet = True
 
     if args.nobreakmodel:
-        koboldai_vars.nobreakmodel = True
+        model_loaders['generic_hf_torch'].nobreakmodel = True
 
     if args.remote:
         koboldai_vars.host = True;
@@ -1484,6 +1484,9 @@ def general_startup(override_args=None):
     if args.localtunnel:
         koboldai_vars.host = True;
 
+    if args.lowmem:
+        model_loaders['generic_hf_torch'].low_mem = True
+
     if args.host != "Disabled":
             # This means --host option was submitted without an argument
             # Enable all LAN IPs (0.0.0.0/0)
@@ -1516,6 +1519,9 @@ def general_startup(override_args=None):
         koboldai_vars.trust_remote_code = True
     if args.cpu:
         koboldai_vars.use_colab_tpu = False
+        koboldai_vars.hascuda = False
+        koboldai_vars.usegpu = False
+        model_loaders['generic_hf_torch'].nobreakmodel = True
 
     koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
     koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1545,245 +1551,6 @@ def general_startup(override_args=None):
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
         
-#==================================================================#
-# Load Model
-#==================================================================# 
-
-@socketio.on("get_model_info")
-def get_model_info(model, directory=""):
-    logger.info("Selected: {}, {}".format(model, directory))
-    # if the model is in the api list
-    disk_blocks = 0
-    key = False
-    breakmodel = False
-    gpu = False
-    layer_count = None
-    key_value = ""
-    break_values = []
-    url = False
-    default_url = None
-    models_on_url = False
-    multi_online_models = False
-    show_online_model_select=False
-    gpu_count = torch.cuda.device_count()
-    gpu_names = []
-    send_horde_models = False
-    show_custom_model_box = False
-    for i in range(gpu_count):
-        gpu_names.append(torch.cuda.get_device_name(i))
-    if model in ['Colab', 'API']:
-        url = True
-    elif model == 'CLUSTER':
-        models_on_url = True
-        show_online_model_select=True
-        url = True
-        key = True
-        default_url = koboldai_vars.horde_url
-        multi_online_models = True
-        key_value = koboldai_vars.horde_api_key
-        url = koboldai_vars.horde_url
-        if key_value:
-            send_horde_models = True
-    elif model in [x.name for x in model_menu['apilist']]:
-        show_online_model_select=True
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                # Check if API key exists
-                try:
-                    js = json.load(file)
-
-                    if("apikey" in js and js["apikey"] != ""):
-                        # API key exists, grab it and close the file
-                        key_value = js["apikey"]
-                    elif 'oaiapikey' in js and js['oaiapikey'] != "":
-                        key_value = js["oaiapikey"]
-                    if model in ('GooseAI', 'OAI'): 
-                        get_oai_models({'model': model, 'key': key_value})
-                except json.decoder.JSONDecodeError:
-                    print(":(")
-                    pass
-        key = True
-    elif "rwkv" in model.lower():
-        pass
-    elif model == 'ReadOnly':
-        pass
-    #elif model == 'customhuggingface':
-    #    show_custom_model_box = True
-    elif args.cpu:
-        pass
-    else:
-        layer_count = get_layer_count(model, directory=directory)
-        if layer_count is None:
-            breakmodel = False
-            gpu = True
-        else:
-            breakmodel = True
-            if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]:
-                filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory)))
-            else:
-                filename = "settings/{}.breakmodel".format(model.replace("/", "_"))
-            if path.exists(filename):
-                with open(filename, "r") as file:
-                    data = [x for x in file.read().split("\n")[:2] if x != '']
-                    if len(data) < 2:
-                        data.append("0")
-                    break_values, disk_blocks = data
-                    break_values = break_values.split(",")
-            else:
-                break_values = [layer_count]
-            break_values = [int(x) for x in break_values if x != '']
-            break_values += [0] * (gpu_count - len(break_values))
-    emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 
-                         'disk_break_value': disk_blocks, 'accelerate': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url,
-                         'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1")
-    emit('selected_model_info', {'key_value': key_value, 'key':key, 
-                         'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, 
-                         'disk_break_value': disk_blocks, 'disk_break': True,
-                         'break_values': break_values, 'gpu_count': gpu_count,
-                         'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select,
-                         'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False,
-                         'show_custom_model_box': show_custom_model_box})
-    if send_horde_models:
-        get_cluster_models({'key': key_value, 'url': default_url})
-    elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER':
-        get_oai_models(key_value)
-    
-    
-
-def get_layer_count(model, directory=""):
-    if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
-        if(model == "GPT2Custom"):
-            with open(os.path.join(directory, "config.json"), "r") as f:
-                model_config = json.load(f)
-        # Get the model_type from the config or assume a model type if it isn't present
-        else:
-            if(directory):
-                model = directory
-            from transformers import AutoConfig
-            if(os.path.isdir(model.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(is_model_downloaded(model)):
-                model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(directory)):
-                model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache")
-            elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))):
-                model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache")
-            else:
-                model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache")
-        try:
-            if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel:
-                return utils.num_layers(model_config)
-            else:
-                return None
-        except:
-            return None
-    else:
-        return None
-
-@socketio.on('OAI_Key_Update')
-def get_oai_models(data):
-    key = data['key']
-    model = data['model']
-    koboldai_vars.oaiapikey = key
-    if model == 'OAI':
-        url = "https://api.openai.com/v1/engines"
-    elif model == 'GooseAI':
-        url = "https://api.goose.ai/v1/engines"
-    else:
-        return
-        
-    # Get list of models from OAI
-    logger.init("OAI Engines", status="Retrieving")
-    req = requests.get(
-        url, 
-        headers = {
-            'Authorization': 'Bearer '+key
-            }
-        )
-    if(req.status_code == 200):
-        r = req.json()
-        engines = r["data"]
-        try:
-            engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines]
-        except:
-            logger.error(engines)
-            raise
-        
-        online_model = ""
-        changed=False
-        
-        #Save the key
-        if not path.exists("settings"):
-            # If the client settings file doesn't exist, create it
-            # Write API key to file
-            os.makedirs('settings', exist_ok=True)
-        if path.exists("settings/{}.v2_settings".format(model)):
-            with open("settings/{}.v2_settings".format(model), "r") as file:
-                js = json.load(file)
-                if 'online_model' in js:
-                    online_model = js['online_model']
-                if "apikey" in js:
-                    if js['apikey'] != key:
-                        changed=True
-        else:
-            js = {}
-            changed=True
-
-        if changed:
-            with open("settings/{}.v2_settings".format(model), "w") as file:
-                js["apikey"] = key
-                file.write(json.dumps(js, indent=3))
-            
-        logger.init_ok("OAI Engines", status="OK")
-        emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-        emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
-    else:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("OAI Engines", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
-
-@socketio.on("get_cluster_models")
-def get_cluster_models(msg):
-    koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key
-    url = msg['url'] or koboldai_vars.horde_url
-    koboldai_vars.horde_url = url
-    # Get list of models from public cluster
-    print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="")
-    try:
-        req = requests.get(f"{url}/api/v2/status/models?type=text")
-    except:
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error("Provided KoboldAI Horde URL unreachable")
-        emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"})
-        return
-    if not req.ok:
-        # Something went wrong, print the message and quit since we can't initialize an engine
-        logger.init_err("KAI Horde Models", status="Failed")
-        logger.error(req.json())
-        emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1")
-        return
-
-    engines = req.json()
-    logger.debug(engines)
-    try:
-        engines = [[en["name"], en["name"]] for en in engines]
-    except:
-        logger.error(engines)
-        raise
-    logger.debug(engines)
-    
-    online_model = ""
-    savesettings()
-
-    logger.init_ok("KAI Horde Models", status="OK")
-
-    emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1")
-    emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2")
-
     
 def unload_model():
     global model
@@ -1845,7 +1612,6 @@ def load_model(plugin, initial_load=False):
         # loadmodelsettings()
         # loadsettings()
         logger.init("GPU support", status="Searching")
-        koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu
         koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
         if(args.breakmodel is not None and args.breakmodel):
             logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
@@ -1861,12 +1627,7 @@ def load_model(plugin, initial_load=False):
         else:
             logger.init_warn("GPU support", status="Not Found")
         
-        if args.cpu:
-            koboldai_vars.usegpu = False
-            gpu_layers = None
-            disk_layers = None
-            koboldai_vars.breakmodel = False
-        elif koboldai_vars.hascuda:
+        if koboldai_vars.hascuda:
             if(koboldai_vars.bmsupported):
                 koboldai_vars.usegpu = False
                 koboldai_vars.breakmodel = True
@@ -1879,6 +1640,7 @@ def load_model(plugin, initial_load=False):
                     
     model = model_loaders[plugin]
     model.load(initial_load=initial_load)
+    logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
     if model:
diff --git a/koboldai_settings.py b/koboldai_settings.py
index d8416df2..e9562ffc 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -710,7 +710,6 @@ class model_settings(settings):
         self.modeldim    = -1     # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
         self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
         self.newlinemode = "n"
-        self.lazy_load   = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
         self.presets     = []   # Holder for presets
         self.selected_preset = ""
         self.uid_presets = []
@@ -1236,7 +1235,7 @@ class system_settings(settings):
         self.corescript  = "default.lua"  # Filename of corescript to load
         self.gpu_device  = 0      # Which PyTorch device to use when using pure GPU generation
         self.savedir     = os.getcwd()+"\\stories"
-        self.hascuda     = False  # Whether torch has detected CUDA on the system
+        self.hascuda     = torch.cuda.is_available()  # Whether torch has detected CUDA on the system
         self.usegpu      = False  # Whether to launch pipeline with GPU support
         self.splist      = []
         self.spselect    = ""     # Temporary storage for soft prompt filename to load
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index d5cf6397..c228e2ee 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -30,7 +30,6 @@ class model_loader(HFTorchInferenceModel):
     
     def _load(self, save_model: bool, initial_load: bool) -> None:
         utils.koboldai_vars.allowsp = True
-        self.lazy_load = utils.koboldai_vars.lazy_load
 
         # Make model path the same as the model name to make this consistent
         # with the other loading method if it isn't a known model type. This
@@ -69,12 +68,14 @@ class model_loader(HFTorchInferenceModel):
 
         # If we're using torch_lazy_loader, we need to get breakmodel config
         # early so that it knows where to load the individual model tensors
+        logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel))
         if (
             self.lazy_load
             and utils.koboldai_vars.hascuda
-            and utils.koboldai_vars.breakmodel
-            and not utils.koboldai_vars.nobreakmodel
+            and self.breakmodel
+            and not self.nobreakmodel
         ):
+            logger.debug("loading breakmodel")
             self.breakmodel_device_config(self.model_config)
 
         if self.lazy_load:
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py
new file mode 100644
index 00000000..08d8ea06
--- /dev/null
+++ b/modeling/inference_models/gooseai.py
@@ -0,0 +1,31 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+
+
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_loader(openai_gooseai_model_loader):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.url = "https://api.goose.ai/v1/engines"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return  model_name == "GooseAI"
\ No newline at end of file
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index c99e9a05..759feb65 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -27,7 +27,7 @@ class model_loader(HFInferenceModel):
         #model_name: str,
     ) -> None:
         super().__init__()
-
+        self.hf_torch = False
         self.model_config = None
         self.capabilties = ModelCapabilities(
             embedding_manipulation=False,
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index efbb01d3..cad2a7f2 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -11,6 +11,8 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
+from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+
 
 
 class OpenAIAPIError(Exception):
@@ -18,172 +20,12 @@ class OpenAIAPIError(Exception):
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(InferenceModel):
+class model_loader(openai_gooseai_model_loader):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
         super().__init__()
-        self.key = ""
+        self.url = "https://api.openai.com/v1/engines"
     
     def is_valid(self, model_name, model_path, menu_path):
-        return model_name == "OAI" or model_name == "GooseAI"
-    
-    def get_requested_parameters(self, model_name, model_path, menu_path):
-        self.source = model_name
-        requested_parameters = []
-        requested_parameters.extend([{
-                                        "uitype": "text",
-                                        "unit": "text",
-                                        "label": "Key",
-                                        "id": "key",
-                                        "default": "",
-                                        "check": {"value": "", 'check': "!="},
-                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
-                                        "menu_path": "",
-                                        "refresh_model_inputs": True,
-                                        "extra_classes": ""
-                                    },
-                                    {
-                                        "uitype": "dropdown",
-                                        "unit": "text",
-                                        "label": "Model",
-                                        "id": "model",
-                                        "default": "",
-                                        "check": {"value": "", 'check': "!="},
-                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
-                                        "menu_path": "",
-                                        "refresh_model_inputs": False,
-                                        "extra_classes": "",
-                                        'children': self.get_oai_models(),
-
-                                    }])
-        return requested_parameters
-        
-    def set_input_parameters(self, parameters):
-        self.key = parameters['key'].strip()
-        self.model = parameters['model']
-
-    def get_oai_models(self):
-        if self.key == "":
-            return []
-        if self.source == 'OAI':
-            url = "https://api.openai.com/v1/engines"
-        elif self.source == 'GooseAI':
-            url = "https://api.goose.ai/v1/engines"
-        else:
-            return
-            
-        # Get list of models from OAI
-        logger.init("OAI Engines", status="Retrieving")
-        req = requests.get(
-            url, 
-            headers = {
-                'Authorization': 'Bearer '+self.key
-                }
-            )
-        if(req.status_code == 200):
-            r = req.json()
-            engines = r["data"]
-            try:
-                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
-            except:
-                logger.error(engines)
-                raise
-            
-            online_model = ""
-
-                
-            logger.init_ok("OAI Engines", status="OK")
-            return engines
-        else:
-            # Something went wrong, print the message and quit since we can't initialize an engine
-            logger.init_err("OAI Engines", status="Failed")
-            logger.error(req.json())
-            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
-            return []
-            
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self.tokenizer = self._get_tokenizer("gpt2")
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-
-        if seed is not None:
-            logger.warning(
-                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
-            )
-
-        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
-
-        # Store context in memory to use it for comparison with generated content
-        utils.koboldai_vars.lastctx = decoded_prompt
-
-        # Build request JSON data
-        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
-        # as the koboldai_vars.model will always be OAI
-        if "GooseAI" in utils.koboldai_vars.configname:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_a": gen_settings.top_a,
-                "top_p": gen_settings.top_p,
-                "top_k": gen_settings.top_k,
-                "tfs": gen_settings.tfs,
-                "typical_p": gen_settings.typical,
-                "repetition_penalty": gen_settings.rep_pen,
-                "repetition_penalty_slope": gen_settings.rep_pen_slope,
-                "repetition_penalty_range": gen_settings.rep_pen_range,
-                "n": batch_count,
-                # TODO: Implement streaming
-                "stream": False,
-            }
-        else:
-            reqdata = {
-                "prompt": decoded_prompt,
-                "max_tokens": max_new,
-                "temperature": gen_settings.temp,
-                "top_p": gen_settings.top_p,
-                "frequency_penalty": gen_settings.rep_pen,
-                "n": batch_count,
-                "stream": False,
-            }
-
-        req = requests.post(
-            utils.koboldai_vars.oaiurl,
-            json=reqdata,
-            headers={
-                "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey,
-                "Content-Type": "application/json",
-            },
-        )
-
-        j = req.json()
-
-        if not req.ok:
-            # Send error message to web client
-            if "error" in j:
-                error_type = j["error"]["type"]
-                error_message = j["error"]["message"]
-            else:
-                error_type = "Unknown"
-                error_message = "Unknown"
-            raise OpenAIAPIError(error_type, error_message)
-
-        outputs = [out["text"] for out in j["choices"]]
-        return GenerationResult(
-            model=self,
-            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
-            prompt=prompt_tokens,
-            is_whole_generation=True,
-            single_line=single_line,
-        )
+        return model_name == "OAI"
\ No newline at end of file
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 1941a12e..c7a781d7 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -22,18 +22,19 @@ class HFInferenceModel(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         try:
             if model_path is not None and os.path.exists(model_path):
-                model_config = AutoConfig.from_pretrained(model_path)
+                self.model_config = AutoConfig.from_pretrained(model_path)
             elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-                model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
             else:
-                model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
             return True
         except:
             return False
         
     def get_requested_parameters(self, model_name, model_path, menu_path):
         requested_parameters = []
-        
+        if not self.hf_torch:
+            return []
         if model_path is not None and os.path.exists(model_path):
             self.model_config = AutoConfig.from_pretrained(model_path)
         elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
@@ -124,14 +125,20 @@ class HFInferenceModel(InferenceModel):
         return requested_parameters
         
     def set_input_parameters(self, parameters):
-        gpu_count = torch.cuda.device_count()
-        layers = []
-        for i in range(gpu_count):
-            layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
-        self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
-        self.layers = layers
-        self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None
-        self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+        if self.hf_torch:
+            import breakmodel
+            gpu_count = torch.cuda.device_count()
+            layers = []
+            for i in range(gpu_count):
+                layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+            self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+            self.layers = layers
+            self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
+            breakmodel.gpu_blocks = layers
+            breakmodel.disk_blocks = self.disk_layers
+            self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.model_type = self.get_model_type()
+            self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
         self.model_name = parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
@@ -157,6 +164,10 @@ class HFInferenceModel(InferenceModel):
                 torch.cuda.empty_cache()
         except:
             pass
+        if self.hf_torch:
+            breakmodel.breakmodel = True
+            breakmodel.gpu_blocks = []
+            breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 7cc16ad5..84c60a6c 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False
 
 
 class HFTorchInferenceModel(HFInferenceModel):
-    def __init__(
-        self,
-        #model_name: str,
-        #lazy_load: bool,
-        #low_mem: bool,
-    ) -> None:
+    def __init__(self) -> None:
         super().__init__()
-        #self.lazy_load = lazy_load
-        #self.low_mem = low_mem
+        self.hf_torch = True
+        self.lazy_load = True
+        self.low_mem = False
+        self.nobreakmodel = False
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,
@@ -398,7 +395,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         Embedding._koboldai_patch_causallm_model = self.model
 
     def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True):
-        if not utils.koboldai_vars.lazy_load:
+        if not self.lazy_load:
             return
 
         if utils.args.breakmodel_disklayers is not None:
@@ -819,14 +816,14 @@ class HFTorchInferenceModel(HFInferenceModel):
         elif (
             utils.args.breakmodel_gpulayers is not None
             or utils.args.breakmodel_disklayers is not None
+            or breakmodel.gpu_blocks != []
         ):
             try:
-                if not utils.args.breakmodel_gpulayers:
-                    breakmodel.gpu_blocks = []
-                else:
-                    breakmodel.gpu_blocks = list(
-                        map(int, utils.args.breakmodel_gpulayers.split(","))
-                    )
+                if breakmodel.gpu_blocks == []:
+                    if utils.args.breakmodel_gpulayers:
+                        breakmodel.gpu_blocks = list(
+                            map(int, utils.args.breakmodel_gpulayers.split(","))
+                        )
                 assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
                 s = n_layers
                 for i in range(len(breakmodel.gpu_blocks)):
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py
new file mode 100644
index 00000000..621ccbad
--- /dev/null
+++ b/modeling/inference_models/parents/openai_gooseai.py
@@ -0,0 +1,189 @@
+import torch
+import requests
+import numpy as np
+from typing import List, Optional, Union
+
+import utils
+from logger import logger
+from modeling.inference_model import (
+    GenerationResult,
+    GenerationSettings,
+    InferenceModel,
+)
+
+
+
+class OpenAIAPIError(Exception):
+    def __init__(self, error_type: str, error_message) -> None:
+        super().__init__(f"{error_type}: {error_message}")
+
+
+class model_loader(InferenceModel):
+    """InferenceModel for interfacing with OpenAI's generation API."""
+    
+    def __init__(self):
+        super().__init__()
+        self.key = ""
+        self.url = "https://api.goose.ai/v1/engines"
+        #if self.source == 'OAI':
+        #    url = "https://api.openai.com/v1/engines"
+        #elif self.source == 'GooseAI':
+        #    url = "https://api.goose.ai/v1/engines"
+    
+    def is_valid(self, model_name, model_path, menu_path):
+        return model_name == "OAI" or model_name == "GooseAI"
+    
+    def get_requested_parameters(self, model_name, model_path, menu_path):
+        self.source = model_name
+        requested_parameters = []
+        requested_parameters.extend([{
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Key",
+                                        "id": "key",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    },
+                                    {
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Model",
+                                        "id": "model",
+                                        "default": "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Which model to use when running OpenAI/GooseAI.",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": False,
+                                        "extra_classes": "",
+                                        'children': self.get_oai_models(),
+
+                                    }])
+        return requested_parameters
+        
+    def set_input_parameters(self, parameters):
+        self.key = parameters['key'].strip()
+        self.model = parameters['model']
+
+    def get_oai_models(self):
+        if self.key == "":
+            return []
+        
+            
+        # Get list of models from OAI
+        logger.init("OAI Engines", status="Retrieving")
+        req = requests.get(
+            self.url, 
+            headers = {
+                'Authorization': 'Bearer '+self.key
+                }
+            )
+        if(req.status_code == 200):
+            r = req.json()
+            engines = r["data"]
+            try:
+                engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines]
+            except:
+                logger.error(engines)
+                raise
+            
+            online_model = ""
+
+                
+            logger.init_ok("OAI Engines", status="OK")
+            return engines
+        else:
+            # Something went wrong, print the message and quit since we can't initialize an engine
+            logger.init_err("OAI Engines", status="Failed")
+            logger.error(req.json())
+            emit('from_server', {'cmd': 'errmsg', 'data': req.json()})
+            return []
+            
+
+    def _load(self, save_model: bool, initial_load: bool) -> None:
+        self.tokenizer = self._get_tokenizer("gpt2")
+
+    def _raw_generate(
+        self,
+        prompt_tokens: Union[List[int], torch.Tensor],
+        max_new: int,
+        gen_settings: GenerationSettings,
+        single_line: bool = False,
+        batch_count: int = 1,
+        seed: Optional[int] = None,
+        **kwargs,
+    ) -> GenerationResult:
+
+        if seed is not None:
+            logger.warning(
+                "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored."
+            )
+
+        decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
+
+        # Store context in memory to use it for comparison with generated content
+        utils.koboldai_vars.lastctx = decoded_prompt
+
+        # Build request JSON data
+        # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
+        # as the koboldai_vars.model will always be OAI
+        if "GooseAI" in utils.koboldai_vars.configname:
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_a": gen_settings.top_a,
+                "top_p": gen_settings.top_p,
+                "top_k": gen_settings.top_k,
+                "tfs": gen_settings.tfs,
+                "typical_p": gen_settings.typical,
+                "repetition_penalty": gen_settings.rep_pen,
+                "repetition_penalty_slope": gen_settings.rep_pen_slope,
+                "repetition_penalty_range": gen_settings.rep_pen_range,
+                "n": batch_count,
+                # TODO: Implement streaming
+                "stream": False,
+            }
+        else:
+            reqdata = {
+                "prompt": decoded_prompt,
+                "max_tokens": max_new,
+                "temperature": gen_settings.temp,
+                "top_p": gen_settings.top_p,
+                "frequency_penalty": gen_settings.rep_pen,
+                "n": batch_count,
+                "stream": False,
+            }
+
+        req = requests.post(
+            self.url,
+            json=reqdata,
+            headers={
+                "Authorization": "Bearer " + self.key,
+                "Content-Type": "application/json",
+            },
+        )
+
+        j = req.json()
+
+        if not req.ok:
+            # Send error message to web client
+            if "error" in j:
+                error_type = j["error"]["type"]
+                error_message = j["error"]["message"]
+            else:
+                error_type = "Unknown"
+                error_message = "Unknown"
+            raise OpenAIAPIError(error_type, error_message)
+
+        outputs = [out["text"] for out in j["choices"]]
+        return GenerationResult(
+            model=self,
+            out_batches=np.array([self.tokenizer.encode(x) for x in outputs]),
+            prompt=prompt_tokens,
+            is_whole_generation=True,
+            single_line=single_line,
+        )
diff --git a/static/koboldai.js b/static/koboldai.js
index 7f004ff2..ab7f7832 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1652,6 +1652,12 @@ function selected_model_info(data) {
 	while (loadmodelsettings.firstChild) {
 		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
 	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
 	var accept = document.getElementById("btn_loadmodelaccept");
 	accept.disabled = false;
 	

From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 20:40:05 -0400
Subject: [PATCH 029/102] Working(?) breakmodel

---
 modeling/inference_models/parents/hf.py       |  3 +-
 modeling/inference_models/parents/hf_torch.py | 52 ++++++++++---------
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index c7a781d7..67fd8b15 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel):
         self.model = None
         self.tokenizer = None
         self.badwordsids = koboldai_settings.badwordsids_default
+        self.usegpu = False
 
     def is_valid(self, model_name, model_path, menu_path):
         try:
@@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel):
             self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
             breakmodel.gpu_blocks = layers
             breakmodel.disk_blocks = self.disk_layers
-            self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
         self.model_name = parameters['id']
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index 84c60a6c..d942a572 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                 raise
 
             logger.warning(f"Fell back to GPT2LMHeadModel due to {e}")
+            logger.debug(traceback_string)
             try:
                 return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs)
             except Exception as e:
@@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel):
                 ):
                     device_map[key] = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else breakmodel.primary_device
                     )
                 else:
@@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel):
                     )
                     device = (
                         utils.koboldai_vars.gpu_device
-                        if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu
+                        if utils.koboldai_vars.hascuda and self.usegpu
                         else "disk"
                         if layer < disk_blocks and layer < ram_blocks
                         else "cpu"
                         if not utils.koboldai_vars.hascuda
-                        or not utils.koboldai_vars.breakmodel
+                        or not self.breakmodel
                         else "shared"
                         if layer < ram_blocks
                         else bisect.bisect_right(
@@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 and breakmodel.primary_device != "cpu"
                                 and utils.koboldai_vars.hascuda
                                 and (
-                                    utils.koboldai_vars.breakmodel
-                                    or utils.koboldai_vars.usegpu
+                                    self.breakmodel
+                                    or self.usegpu
                                 )
                                 and model_dict[key].dtype is torch.float32
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float16)
                             if breakmodel.primary_device == "cpu" or (
-                                not utils.koboldai_vars.usegpu
-                                and not utils.koboldai_vars.breakmodel
+                                not self.usegpu
+                                and not self.breakmodel
                                 and model_dict[key].dtype is torch.float16
                             ):
                                 model_dict[key] = model_dict[key].to(torch.float32)
@@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                         and breakmodel.primary_device != "cpu"
                                         and utils.koboldai_vars.hascuda
                                         and (
-                                            utils.koboldai_vars.breakmodel
-                                            or utils.koboldai_vars.usegpu
+                                            self.breakmodel
+                                            or self.usegpu
                                         )
                                     ):
                                         dtype = torch.float16
                                     if breakmodel.primary_device == "cpu" or (
-                                        not utils.koboldai_vars.usegpu
-                                        and not utils.koboldai_vars.breakmodel
+                                        not self.usegpu
+                                        and not self.breakmodel
                                     ):
                                         dtype = torch.float32
                                     if (
@@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel):
                             and breakmodel.primary_device != "cpu"
                             and utils.koboldai_vars.hascuda
                             and (
-                                utils.koboldai_vars.breakmodel
-                                or utils.koboldai_vars.usegpu
+                                self.breakmodel
+                                or self.usegpu
                             )
                             and model_dict[key].dtype is torch.float32
                         ):
                             model_dict[key] = model_dict[key].to(torch.float16)
 
                         if breakmodel.primary_device == "cpu" or (
-                            not utils.koboldai_vars.usegpu
-                            and not utils.koboldai_vars.breakmodel
+                            not self.usegpu
+                            and not self.breakmodel
                             and model_dict[key].dtype is torch.float16
                         ):
                             model_dict[key] = model_dict[key].to(torch.float32)
@@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel):
                                     and breakmodel.primary_device != "cpu"
                                     and utils.koboldai_vars.hascuda
                                     and (
-                                        utils.koboldai_vars.breakmodel
-                                        or utils.koboldai_vars.usegpu
+                                        self.breakmodel
+                                        or self.usegpu
                                     )
                                 ):
                                     dtype = torch.float16
                                 if breakmodel.primary_device == "cpu" or (
-                                    not utils.koboldai_vars.usegpu
-                                    and not utils.koboldai_vars.breakmodel
+                                    not self.usegpu
+                                    and not self.breakmodel
                                 ):
                                     dtype = torch.float32
                                 if (
@@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         if always_use or (
             utils.koboldai_vars.hascuda
             and self.low_mem
-            and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel)
+            and (self.usegpu or self.breakmodel)
         ):
             original_dtype = torch.get_default_dtype()
             torch.set_default_dtype(torch.float16)
@@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             -1,
             utils.num_layers(config),
         ):
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = True
+            logger.debug("All layers on same GPU. Breakmodel disabled")
+            self.breakmodel = False
+            self.usegpu = True
             utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1
             return
 
@@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel):
             import breakmodel
 
             breakmodel.primary_device = "cpu"
-            utils.koboldai_vars.breakmodel = False
-            utils.koboldai_vars.usegpu = False
+            self.breakmodel = False
+            self.usegpu = False
             return

From aaa91338996a652960bfa8b9461c2f0de8d82bee Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 11 May 2023 21:22:33 -0400
Subject: [PATCH 030/102] Disk Cache working UI valid marker broken for disk
 cache

---
 aiserver.py                                   | 4 +---
 modeling/inference_models/parents/hf.py       | 6 +++---
 modeling/inference_models/parents/hf_torch.py | 8 ++++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 158a6699..a306449e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1639,7 +1639,7 @@ def load_model(plugin, initial_load=False):
 
                     
     model = model_loaders[plugin]
-    model.load(initial_load=initial_load)
+    model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
@@ -6156,8 +6156,6 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    logger.info("loading Model")
-    logger.info(data)
     model_loaders[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 67fd8b15..03955d88 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -53,12 +53,12 @@ class HFInferenceModel(InferenceModel):
                     break_values = break_values.split(",")
             else:
                 break_values = [layer_count]
-                disk_blocks = None
+                disk_blocks = 0
             break_values = [int(x) for x in break_values if x != '' and x is not None]
             gpu_count = torch.cuda.device_count()
             break_values += [0] * (gpu_count - len(break_values))
             if disk_blocks is not None:
-                break_values += [disk_blocks]
+                break_values += [int(disk_blocks)]
             for i in range(gpu_count):
                 requested_parameters.append({
                                                 "uitype": "slider",
@@ -134,7 +134,7 @@ class HFInferenceModel(InferenceModel):
                 layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
             self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
             self.layers = layers
-            self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0    
+            self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
             breakmodel.gpu_blocks = layers
             breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py
index d942a572..aae3ada3 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/parents/hf_torch.py
@@ -780,6 +780,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         device_count = torch.cuda.device_count()
         if device_count < 2:
             primary = None
+        logger.debug("n_layers: {}".format(n_layers))
         gpu_blocks = breakmodel.gpu_blocks + (
             device_count - len(breakmodel.gpu_blocks)
         ) * [0]
@@ -835,10 +836,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                         s -= breakmodel.gpu_blocks[i]
                 assert sum(breakmodel.gpu_blocks) <= n_layers
                 n_layers -= sum(breakmodel.gpu_blocks)
-                if utils.args.breakmodel_disklayers is not None:
-                    assert utils.args.breakmodel_disklayers <= n_layers
-                    breakmodel.disk_blocks = utils.args.breakmodel_disklayers
-                    n_layers -= utils.args.breakmodel_disklayers
+                n_layers -= breakmodel.disk_blocks
             except:
                 logger.warning(
                     "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
@@ -949,6 +947,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
+        with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file:
+            file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks))
 
         # If all layers are on the same device, use the old GPU generation mode
         while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:

From 67df9b917f6a84445520e89a04080e8553356b15 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Fri, 12 May 2023 09:08:07 +0200
Subject: [PATCH 031/102] Reintroduce 4.29 Transformers

---
 aiserver.py                  | 2 +-
 environments/huggingface.yml | 2 +-
 environments/rocm.yml        | 2 +-
 requirements.txt             | 2 +-
 requirements_mtj.txt         | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 1abdd31e..b045ea71 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -221,7 +221,7 @@ model_menu = {
         MenuFolder("Untuned Fairseq Dense", "fsdlist"),
         MenuFolder("Untuned Bloom", "bloomlist"),
         MenuFolder("Untuned XGLM", "xglmlist"),
-        #MenuFolder("Official RWKV-4", "rwkvlist"),
+        MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
         MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index af16423e..3d0ca633 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,7 +32,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.*
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index ffcacfb6..eb2927bd 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -30,7 +30,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.28.*
+    - transformers==4.29.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/requirements.txt b/requirements.txt
index c98b7252..28fdb28c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.28.*
+transformers==4.29.*
 huggingface_hub==0.12.1
 Flask==2.2.3
 Flask-SocketIO==5.3.2
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index b41b7ead..7fc866f0 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,7 @@ requests
 dm-haiku==0.0.9
 jax==0.3.25
 jaxlib==0.3.25
-transformers==4.28.*
+transformers==4.29.*
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2

From 205c64f1ea161ec2d0b3929efb73db429fa0f798 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sat, 13 May 2023 20:26:55 +0200
Subject: [PATCH 032/102] More universal pytorch folder detection

---
 modeling/inference_models/hf_torch.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 3f7c3967..cc7af713 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -581,10 +581,9 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 last_storage_key = storage_key
                                 if isinstance(f, zipfile.ZipExtFile):
                                     f.close()
-                                try:
-                                    f = z.open(f"archive/data/{storage_key}")
-                                except:
-                                    f = z.open(f"{zipfolder}/data/{storage_key}")
+                                ziproot = z.namelist()[0].split("/")[0]
+                                f = z.open(f"{ziproot}/data/{storage_key}")
+                                
                                 current_offset = 0
                             if current_offset != model_dict[key].seek_offset:
                                 f.read(model_dict[key].seek_offset - current_offset)

From 56443bc7eaf4404c153368582baff107a3137bcb Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 15 May 2023 21:44:01 +0200
Subject: [PATCH 033/102] Unban torch._tensor._rebuild_tensor_v2

---
 modeling/lazy_loader.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index 3dee5bae..e7acc784 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -196,6 +196,8 @@ class RestrictedUnpickler(pickle.Unpickler):
             return collections.OrderedDict
         elif module == "torch._utils" and name == "_rebuild_tensor_v2":
             return torch._utils._rebuild_tensor_v2
+        elif module == "torch._tensor" and name == "_rebuild_tensor_v2":
+            return torch._tensor._rebuild_tensor_v2
         elif module == "torch" and name in (
             "DoubleStorage",
             "FloatStorage",

From c5100b4eab5b37e0d575869283a6f837f5398f00 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 15 May 2023 22:21:22 +0200
Subject: [PATCH 034/102] Unban Tensor

---
 modeling/lazy_loader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index e7acc784..a948821e 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -209,6 +209,7 @@ class RestrictedUnpickler(pickle.Unpickler):
             "ByteStorage",
             "BoolStorage",
             "BFloat16Storage",
+            "Tensor",
         ):
             return getattr(torch, name)
         elif module == "numpy.core.multiarray" and name == "scalar":
@@ -221,7 +222,7 @@ class RestrictedUnpickler(pickle.Unpickler):
             # Forbid everything else.
             qualified_name = name if module == "__builtin__" else f"{module}.{name}"
             raise pickle.UnpicklingError(
-                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code"
+                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}"
             )
 
     def load(self, *args, **kwargs):

From 59c96b5b7aea2eaf4ad8bab70794c8f3d41edccf Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Mon, 15 May 2023 22:38:12 +0200
Subject: [PATCH 035/102] Unban fix

---
 modeling/lazy_loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py
index a948821e..5a27d549 100644
--- a/modeling/lazy_loader.py
+++ b/modeling/lazy_loader.py
@@ -196,8 +196,8 @@ class RestrictedUnpickler(pickle.Unpickler):
             return collections.OrderedDict
         elif module == "torch._utils" and name == "_rebuild_tensor_v2":
             return torch._utils._rebuild_tensor_v2
-        elif module == "torch._tensor" and name == "_rebuild_tensor_v2":
-            return torch._tensor._rebuild_tensor_v2
+        elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
+            return torch._tensor._rebuild_from_type_v2
         elif module == "torch" and name in (
             "DoubleStorage",
             "FloatStorage",

From b2501e469381eb42530fdf74d7d7322e5dd1f6f7 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 16 May 2023 22:15:59 +0200
Subject: [PATCH 036/102] 4.29 was still to buggy

---
 environments/huggingface.yml | 2 +-
 environments/rocm.yml        | 2 +-
 requirements.txt             | 2 +-
 requirements_mtj.txt         | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 3d0ca633..af16423e 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,7 +32,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.29.*
+    - transformers==4.28.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/environments/rocm.yml b/environments/rocm.yml
index eb2927bd..ffcacfb6 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -30,7 +30,7 @@ dependencies:
     - flask-ngrok
     - flask-cors
     - lupa==1.10
-    - transformers==4.29.*
+    - transformers==4.28.*
     - huggingface_hub==0.12.1
     - safetensors==0.3.1
     - accelerate==0.18.0
diff --git a/requirements.txt b/requirements.txt
index 28fdb28c..c98b7252 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.29.*
+transformers==4.28.*
 huggingface_hub==0.12.1
 Flask==2.2.3
 Flask-SocketIO==5.3.2
diff --git a/requirements_mtj.txt b/requirements_mtj.txt
index 7fc866f0..b41b7ead 100644
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,7 +5,7 @@ requests
 dm-haiku==0.0.9
 jax==0.3.25
 jaxlib==0.3.25
-transformers==4.29.*
+transformers==4.28.*
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2

From f027d8b6e56393c12b8cd1611a3c0b7cc90802c9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 17 May 2023 21:15:31 -0400
Subject: [PATCH 037/102] Better working valid detection and named model
 backends for UI

---
 aiserver.py                                   |  53 +++--
 modeling/inference_models/api.py              |   3 +-
 modeling/inference_models/basic_api.py        |   4 +-
 modeling/inference_models/generic_hf_torch.py |   3 +-
 modeling/inference_models/gooseai.py          |   5 +-
 modeling/inference_models/hf_mtj.py           |   4 +-
 modeling/inference_models/horde.py            |   3 +-
 modeling/inference_models/openai.py           |   6 +-
 modeling/inference_models/parents/hf.py       |  24 +-
 .../parents/openai_gooseai.py                 |   2 +-
 modeling/inference_models/readonly.py         |   3 +-
 modeling/inference_models/rwkv.py             |   5 +-
 static/koboldai.js                            | 206 +++++++++++-------
 templates/templates.html                      |   5 +-
 14 files changed, 191 insertions(+), 135 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 92dde7f4..314fb512 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -622,12 +622,12 @@ from modeling.patches import patch_transformers
 
 #Load all of the model importers
 import importlib
-model_loader_code = {}
-model_loaders = {}
+model_backend_code = {}
+model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
     if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
-        model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
-        model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader()
+        model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
+        model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend()
         
 
 old_socketio_on = socketio.on
@@ -1354,6 +1354,7 @@ def general_startup(override_args=None):
     parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
+    parser.add_argument("--model_backend", help="Specify the model backend you want to use")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1447,6 +1448,12 @@ def general_startup(override_args=None):
     args.max_summary_length = int(args.max_summary_length)
 
     if args.model:
+        # At this point we have to try to load the model through the selected backend
+        if not args.model_backend:
+            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
+            exit()
+        #if 
+        
         koboldai_vars.model = args.model;
     koboldai_vars.revision = args.revision
     koboldai_settings.multi_story = args.multi_story
@@ -1472,7 +1479,7 @@ def general_startup(override_args=None):
         koboldai_vars.quiet = True
 
     if args.nobreakmodel:
-        model_loaders['generic_hf_torch'].nobreakmodel = True
+        model_backends['Huggingface'].nobreakmodel = True
 
     if args.remote:
         koboldai_vars.host = True;
@@ -1484,7 +1491,7 @@ def general_startup(override_args=None):
         koboldai_vars.host = True;
 
     if args.lowmem:
-        model_loaders['generic_hf_torch'].low_mem = True
+        model_backends['Huggingface'].low_mem = True
 
     if args.host != "Disabled":
             # This means --host option was submitted without an argument
@@ -1520,7 +1527,7 @@ def general_startup(override_args=None):
         koboldai_vars.use_colab_tpu = False
         koboldai_vars.hascuda = False
         koboldai_vars.usegpu = False
-        model_loaders['generic_hf_torch'].nobreakmodel = True
+        model_backends['Huggingface'].nobreakmodel = True
 
     koboldai_vars.smandelete = koboldai_vars.host == args.override_delete
     koboldai_vars.smanrename = koboldai_vars.host == args.override_rename
@@ -1582,7 +1589,7 @@ def unload_model():
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
     
     
-def load_model(plugin, initial_load=False):
+def load_model(model_backend, initial_load=False):
     global model
     global tokenizer
     global model_config
@@ -1637,7 +1644,7 @@ def load_model(plugin, initial_load=False):
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
                     
-    model = model_loaders[plugin]
+    model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
@@ -6103,33 +6110,23 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        logger.debug("Asking for model info on potential model: {}".format(data))
-        valid = False
         if 'path' not in data or data['path'] == "":
             valid_loaders = {}
-            for model_loader in model_loaders:
-                logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"])))
-                if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
-                    valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-                    valid = True
-            if valid:
-                logger.debug("Valid Loaders: {}".format(valid_loaders))
-                emit("selected_model_info", valid_loaders)
-        if not valid and 'path' in data:
+            for model_backend in model_backends:
+                valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+            emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
+        else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
             output = []
             for path in paths:
                 valid=False
-                for model_loader in model_loaders:
-                    if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"):
+                for model_backend in model_backends:
+                    if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
                         valid=True
                         break
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
-            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})
-        elif not valid:
-            logger.error("Nothing can load the model: {}".format(valid_loaders))
-            
+            emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
     
     
@@ -6156,7 +6153,7 @@ def UI_2_select_model(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
-    model_loaders[data['plugin']].set_input_parameters(data)
+    model_backends[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
 
@@ -10671,7 +10668,7 @@ for schema in config_endpoint_schemas:
 def startup():
     if koboldai_vars.model == "" or koboldai_vars.model is None:
         koboldai_vars.model = "ReadOnly"
-        socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True})
+        socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True})
             
 print("", end="", flush=True)
 
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py
index 5bddd714..409158f5 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api.py
@@ -17,12 +17,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "KoboldAI API"
 
 class APIException(Exception):
     """To be used for errors when using the Kobold API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         #self.base_url = ""
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py
index 5666ba8e..cca9652b 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api.py
@@ -15,11 +15,13 @@ from modeling.inference_model import (
 )
 
 
+model_backend_name = "KoboldAI Old Colab Method"
+
 class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py
index c228e2ee..f7a00f45 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch.py
@@ -22,8 +22,9 @@ except ModuleNotFoundError as e:
 
 from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
 
+model_backend_name = "Huggingface"
 
-class model_loader(HFTorchInferenceModel):
+class model_backend(HFTorchInferenceModel):
     
     def _initialize_model(self):
         return
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py
index 08d8ea06..9d6e8771 100644
--- a/modeling/inference_models/gooseai.py
+++ b/modeling/inference_models/gooseai.py
@@ -11,16 +11,17 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
+from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 
+model_backend_name = "GooseAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(openai_gooseai_model_loader):
+class model_backend(openai_gooseai_model_backend):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py
index 4e82d348..6351eca2 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj.py
@@ -19,10 +19,10 @@ from modeling.inference_model import (
 from modeling.inference_models.parents.hf import HFInferenceModel
 from modeling.tokenizer import GenericTokenizer
 
+model_backend_name = "Huggingface MTJ"
 
 
-
-class model_loader(HFInferenceModel):
+class model_backend(HFInferenceModel):
     def __init__(
         self,
         #model_name: str,
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index bd457197..6c880bbe 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -16,12 +16,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "Horde"
 
 class HordeException(Exception):
     """To be used for errors on server side of the Horde."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.url = "https://horde.koboldai.net"
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py
index cad2a7f2..19a7d1e6 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai.py
@@ -11,16 +11,16 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader
-
+from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
 
+model_backend_name = "OpenAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(openai_gooseai_model_loader):
+class model_backend(openai_gooseai_model_backend):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index ba291c3f..69549bd5 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -1,7 +1,7 @@
 import os
 from typing import Optional
 from transformers import AutoConfig
-
+import warnings
 import utils
 import koboldai_settings
 from logger import logger
@@ -43,7 +43,7 @@ class HFInferenceModel(InferenceModel):
         else:
             self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
         layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
-        if layer_count is not None and layer_count >= 0:
+        if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
             if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
                 with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
                     data = [x for x in file.read().split("\n")[:2] if x != '']
@@ -128,15 +128,17 @@ class HFInferenceModel(InferenceModel):
     def set_input_parameters(self, parameters):
         if self.hf_torch:
             import breakmodel
-            gpu_count = torch.cuda.device_count()
-            layers = []
-            for i in range(gpu_count):
-                layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
-            self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
-            self.layers = layers
-            self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
-            breakmodel.gpu_blocks = layers
-            breakmodel.disk_blocks = self.disk_layers
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                gpu_count = torch.cuda.device_count()
+                layers = []
+                for i in range(gpu_count):
+                    layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+                self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                self.layers = layers
+                self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
+                breakmodel.gpu_blocks = layers
+                breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py
index 621ccbad..871ea5ce 100644
--- a/modeling/inference_models/parents/openai_gooseai.py
+++ b/modeling/inference_models/parents/openai_gooseai.py
@@ -18,7 +18,7 @@ class OpenAIAPIError(Exception):
         super().__init__(f"{error_type}: {error_message}")
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     """InferenceModel for interfacing with OpenAI's generation API."""
     
     def __init__(self):
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py
index c642c05a..92531af4 100644
--- a/modeling/inference_models/readonly.py
+++ b/modeling/inference_models/readonly.py
@@ -14,12 +14,13 @@ from modeling.inference_model import (
     ModelCapabilities,
 )
 
+model_backend_name = "Read Only"
 
 class BasicAPIException(Exception):
     """To be used for errors when using the Basic API as an interface."""
 
 
-class model_loader(InferenceModel):
+class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
 
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
index d14d8c81..fa6497b7 100644
--- a/modeling/inference_models/rwkv.py
+++ b/modeling/inference_models/rwkv.py
@@ -55,7 +55,10 @@ MODEL_FILES = {
 }
 
 
-class model_loader(InferenceModel):
+model_backend_name = "RWKV"
+
+
+class model_backend(InferenceModel):
     def __init__(
         self,
         #model_name: str,
diff --git a/static/koboldai.js b/static/koboldai.js
index de3ab324..905403c1 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1645,8 +1645,85 @@ function show_model_menu(data) {
 	
 }
 
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				console.log("resubmit");
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+			}
+		}
+	}
+	var accept = document.getElementById("btn_loadmodelaccept");
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
+		}
+	}
+	
+	if (ok_to_load) {
+		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
+	}
+}
 
-function selected_model_info(data) {
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
 	//clear out the loadmodelsettings
 	var loadmodelsettings = document.getElementById('loadmodelsettings')
 	while (loadmodelsettings.firstChild) {
@@ -1667,7 +1744,10 @@ function selected_model_info(data) {
 		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
 				area.classList.add("hidden");
 		}
-		document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
 	}
 	//create the content
 	for (const [loader, items] of Object.entries(data)) {
@@ -1679,7 +1759,11 @@ function selected_model_info(data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
+		if (loader == sent_data['preselected']) {
+			modelplugin.value = sent_data['preselected'];
+		}
 		
+		//create the user input for each requested input
 		for (item of items) {
 			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
 			new_setting.id = loader;
@@ -1687,73 +1771,7 @@ function selected_model_info(data) {
 			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
 			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
 			
-			onchange_event = function () {
-				//get check value:
-				if ('sum' in this.check_data) {
-					check_value = 0
-					for (const temp of this.check_data['sum']) {
-						if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-							check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
-						}
-					}
-				} else {
-					check_value = this.value
-				}
-				if (this.check_data['check'] == "=") {
-					valid = (check_value == this.check_data['value']);
-				} else if (this.check_data['check'] == "!=") {
-					valid = (check_value != this.check_data['value']);
-				} else if (this.check_data['check'] == ">=") {
-					valid = (check_value >= this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value <= this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value > this.check_data['value']);
-				} else if (this.check_data['check'] == "<=") {	
-					valid = (check_value < this.check_data['value']);
-				}
-				if (valid) {
-					//if we are supposed to refresh when this value changes we'll resubmit
-					if (this.getAttribute("refresh_model_inputs") == "true") {
-						console.log("resubmit");
-					}
-					if ('sum' in this.check_data) {
-						for (const temp of this.check_data['sum']) {
-							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
-							}
-						}
-					} else {
-						this.closest(".setting_container_model").classList.remove('input_error');
-						this.closest(".setting_container_model").removeAttribute("tooltip");
-					}
-					var accept = document.getElementById("btn_loadmodelaccept");
-					if (document.getElementsByClassName("input_error").length)
-					accept.disabled = true;
-				} else {
-					if ('sum' in this.check_data) {
-						for (const temp of this.check_data['sum']) {
-							if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
-								document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
-							}
-						}
-					} else {
-						this.closest(".setting_container_model").classList.add('input_error');
-						this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
-					}
-				}
-				var accept = document.getElementById("btn_loadmodelaccept");
-				if (document.getElementsByClassName("input_error").length > 0) {
-					accept.classList.add("disabled");
-					accept.disabled = true;
-				} else {
-					accept.classList.remove("disabled");
-					accept.disabled = false;
-				}
-				
-			}
+			onchange_event = model_settings_checker;
 			if (item['uitype'] == "slider") {
 				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
 				slider_number.value = item['default'];
@@ -1764,6 +1782,7 @@ function selected_model_info(data) {
 				slider.value = item['default'];
 				slider.min = item['min'];
 				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
 				slider.id = loader + "|" + item['id'] + "_value";
 				if ('check' in item) {
 					slider.check_data = item['check'];
@@ -1777,25 +1796,37 @@ function selected_model_info(data) {
 				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
 				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
 				slider.onchange();
+				slider.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_slider').remove();
 			}
 			if (item['uitype'] == "toggle") {
-				var toggle = new_setting.querySelector('#blank_model_settings_toggle');
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
 				toggle.id = loader + "|" + item['id'] + "_value";
 				toggle.checked = item['default'];
-				toggle.onchange = onchange_event;
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
 				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
 					toggle.check_data = item['check'];
 				} else {
 					toggle.check_data = null;
 				}
-				toggle.onchange();
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden");
-				new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
 			}
 			if (item['uitype'] == "dropdown") {
 				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
@@ -1807,6 +1838,7 @@ function selected_model_info(data) {
 					select_element.append(new_option);
 				}
 				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
 				select_element.onchange = onchange_event;
 				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
@@ -1814,14 +1846,17 @@ function selected_model_info(data) {
 				} else {
 					select_element.check_data = null;
 				}
+				select_element.noresubmit = true;
 				select_element.onchange();
+				select_element.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
 			}
 			if (item['uitype'] == "password") {
 				var password_item = new_setting.querySelector('#blank_model_settings_password');
 				password_item.id = loader + "|" + item['id'] + "_value";
 				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
 				password_item.onchange = onchange_event;
 				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
@@ -1829,24 +1864,29 @@ function selected_model_info(data) {
 				} else {
 					password_item.check_data = null;
 				}
+				password_item.noresubmit = true;
 				password_item.onchange();
+				password_item.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_password').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_password').remove();
 			}
 			if (item['uitype'] == "text") {
 				var text_item = new_setting.querySelector('#blank_model_settings_text');
 				text_item.id = loader + "|" + item['id'] + "_value";
 				text_item.value = item['default'];
 				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
 				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
 				if ('check' in item) {
 					text_item.check_data = item['check'];
 				} else {
 					text_item.check_data = null;
 				}
+				text_item.noresubmit = true;
 				text_item.onchange();
+				text_item.noresubmit = false;
 			} else {
-				new_setting.querySelector('#blank_model_settings_text').classList.add("hidden");
+				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
 			model_area.append(new_setting);
@@ -1891,7 +1931,15 @@ function load_model() {
 	//get an object of all the input settings from the user
 	data = {}
 	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		data[element.id.split("|")[1].replace("_value", "")] = element.value;
+		var element_data = element.value;
+		if (element.getAttribute("data_type") == "int") {
+			element_data = parseInt(element_data);
+		} else if (element.getAttribute("data_type") == "float") {
+			element_data = parseFloat(element_data);
+		} else if (element.getAttribute("data_type") == "bool") {
+			element_data = (element_data == 'on');
+		}
+		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	
diff --git a/templates/templates.html b/templates/templates.html
index 49cd3e5b..49fa99f6 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -162,9 +162,8 @@
 	<input autocomplete="off" class="setting_value" id="blank_model_settings_value_slider_number">
 	<span class="setting_item">
 		<input type="range" id="blank_model_settings_slider" class="setting_item_input blank_model_settings_input model_settings_input">
-		<span id="blank_model_settings_checkbox_container">
-			<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">
-		</span>
+		<!--<input type=checkbox id="blank_model_settings_toggle" class="setting_item_input blank_model_settings_input model_settings_input" data-size="mini" data-onstyle="success" data-toggle="toggle">-->
+		<span id="blank_model_settings_toggle"></span>
 		<select id="blank_model_settings_dropdown" class="settings_select blank_model_settings_input model_settings_input"></select>
 		<input type=password id="blank_model_settings_password" class="settings_select blank_model_settings_input model_settings_input">
 		<input id="blank_model_settings_text" class="settings_select blank_model_settings_input model_settings_input">

From 182ecff20273b4921f4cefa04f7a845d22fc58ac Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 16:01:17 -0400
Subject: [PATCH 038/102] Added in model backend to the command line arguments

---
 aiserver.py                             | 70 ++++++++++++++++---------
 modeling/inference_model.py             |  2 +
 modeling/inference_models/horde.py      |  1 -
 modeling/inference_models/parents/hf.py |  8 ++-
 4 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 314fb512..235732ec 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1355,6 +1355,7 @@ def general_startup(override_args=None):
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
     parser.add_argument("--model_backend", help="Specify the model backend you want to use")
+    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1447,14 +1448,6 @@ def general_startup(override_args=None):
 
     args.max_summary_length = int(args.max_summary_length)
 
-    if args.model:
-        # At this point we have to try to load the model through the selected backend
-        if not args.model_backend:
-            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
-            exit()
-        #if 
-        
-        koboldai_vars.model = args.model;
     koboldai_vars.revision = args.revision
     koboldai_settings.multi_story = args.multi_story
 
@@ -1556,6 +1549,37 @@ def general_startup(override_args=None):
     
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
+    if args.model:
+        # At this point we have to try to load the model through the selected backend
+        if not args.model_backend:
+            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
+            logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends])))
+            exit()
+        if args.model_backend not in model_backends:
+            logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
+            exit()
+        #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
+        parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
+        ok_to_load = True
+        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        for parameter in parameters:
+            if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+                ok_to_load = False
+            elif parameter['id'] not in arg_parameters:
+                arg_parameters[parameter] = parameter['default']
+        if not ok_to_load:
+            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            exit()
+        arg_parameters['id'] = args.model
+        arg_parameters['model_path'] = args.path
+        arg_parameters['menu_path'] = ""
+        model_backends[args.model_backend].set_input_parameters(arg_parameters)
+        koboldai_vars.model = args.model
+        return args.model_backend
+    else:
+        return "Read Only"
+    
         
     
 def unload_model():
@@ -1633,13 +1657,13 @@ def load_model(model_backend, initial_load=False):
         else:
             logger.init_warn("GPU support", status="Not Found")
         
-        if koboldai_vars.hascuda:
-            if(koboldai_vars.bmsupported):
-                koboldai_vars.usegpu = False
-                koboldai_vars.breakmodel = True
-            else:
-                koboldai_vars.breakmodel = False
-                koboldai_vars.usegpu = use_gpu
+        #if koboldai_vars.hascuda:
+        #    if(koboldai_vars.bmsupported):
+        #        koboldai_vars.usegpu = False
+        #        koboldai_vars.breakmodel = True
+        #    else:
+        #        koboldai_vars.breakmodel = False
+        #        koboldai_vars.usegpu = use_gpu
     else:
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
@@ -10665,10 +10689,8 @@ for schema in config_endpoint_schemas:
 #==================================================================#
 #  Final startup commands to launch Flask app
 #==================================================================#
-def startup():
-    if koboldai_vars.model == "" or koboldai_vars.model is None:
-        koboldai_vars.model = "ReadOnly"
-        socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True})
+def startup(command_line_backend):
+    socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True})
             
 print("", end="", flush=True)
 
@@ -10677,7 +10699,7 @@ def run():
     global app
     global tpu_mtj_backend
 
-    general_startup()
+    command_line_backend = general_startup()
     # Start flask & SocketIO
     logger.init("Flask", status="Starting")
     if koboldai_vars.host:
@@ -10725,7 +10747,7 @@ def run():
            cloudflare = _run_cloudflared(port)
            koboldai_vars.cloudflare_link = cloudflare
            
-        startup()
+        startup(command_line_backend)
        
         if(args.localtunnel or args.ngrok or args.remote):
             with open('cloudflare.log', 'w') as cloudflarelog:
@@ -10745,7 +10767,7 @@ def run():
         else:
             socketio.run(app, port=port)
     else:
-        startup()
+        startup(command_line_backend)
         if args.unblock:
             if not args.no_ui:
                 try:
@@ -10773,13 +10795,13 @@ def run():
 if __name__ == "__main__":
     run()
 else:
-    general_startup()
+    command_line_backend = general_startup()
     # Start flask & SocketIO
     logger.init("Flask", status="Starting")
     Session(app)
     logger.init_ok("Flask", status="OK")
     patch_transformers()
-    startup()
+    startup(command_line_backend)
     koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000
     print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True)
     
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 4a29a027..c3fff46f 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -178,6 +178,8 @@ class InferenceModel:
         return {}
         
     def set_input_parameters(self, parameters):
+        for parameter in parameters:
+            setattr(self, parameter, parameters[parameter])
         return
 
     def load(self, save_model: bool = False, initial_load: bool = False) -> None:
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 6c880bbe..5d8552fb 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -86,7 +86,6 @@ class model_backend(InferenceModel):
         
     def get_cluster_models(self):
         # Get list of models from public cluster
-        logger.info("<purple>Retrieving engine list...</purple>")
         try:
             req = requests.get(f"{self.url}/api/v2/status/models?type=text")
         except:
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py
index 69549bd5..70143b69 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/parents/hf.py
@@ -133,10 +133,14 @@ class HFInferenceModel(InferenceModel):
                 gpu_count = torch.cuda.device_count()
                 layers = []
                 for i in range(gpu_count):
-                    layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None)
+                    layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None)
                 self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                if isinstance(self.cpu_layers, str):
+                    self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
                 self.layers = layers
-                self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0    
+                self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0    
+                if isinstance(self.disk_layers, str):
+                    self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
                 breakmodel.gpu_blocks = layers
                 breakmodel.disk_blocks = self.disk_layers
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None

From 4040538d3438acd56e4a9121708a79b6d0d5da83 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 18:34:00 -0400
Subject: [PATCH 039/102] Model Backends now defined in the menu

---
 aiserver.py                        | 38 ++++++++++++++++--------------
 modeling/inference_models/horde.py |  3 ++-
 static/koboldai.css                | 11 +++++++--
 static/koboldai.js                 | 16 +++++++++++--
 templates/popups.html              |  2 +-
 5 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 235732ec..aeebdbc1 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -178,11 +178,13 @@ class MenuModel(MenuItem):
         vram_requirements: str = "",
         model_type: MenuModelType = MenuModelType.HUGGINGFACE,
         experimental: bool = False,
+        model_backend: str = "Huggingface",
     ) -> None:
         super().__init__(label, name, experimental)
         self.model_type = model_type
         self.vram_requirements = vram_requirements
         self.is_downloaded = is_model_downloaded(self.name)
+        self.model_backend = model_backend
     
     def to_ui1(self) -> list:
         return [
@@ -245,7 +247,7 @@ model_menu = {
         MenuFolder("Official RWKV-4", "rwkvlist"),
         MenuFolder("Untuned GPT2", "gpt2list"),
         MenuFolder("Online Services", "apilist"),
-        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER),
+        MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
     ],
     'adventurelist': [
         MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"),
@@ -369,25 +371,24 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
-        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
-        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
-        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
-        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
-        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
-        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
-        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
-        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
-        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [
-        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API),
-        MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API),
-        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API),
-        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API),
+        MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"),
+        MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"),
+        MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"),
+        MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"),
+        MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"),
         MenuFolder("Return to Main Menu", "mainmenu"),
     ]
 }
@@ -1670,6 +1671,7 @@ def load_model(model_backend, initial_load=False):
                     
     model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+    koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
@@ -6136,7 +6138,7 @@ def UI_2_select_model(data):
         #Get load methods
         if 'path' not in data or data['path'] == "":
             valid_loaders = {}
-            for model_backend in model_backends:
+            for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
                 valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
             emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
         else:
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py
index 5d8552fb..8e05fbbd 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde.py
@@ -70,6 +70,7 @@ class model_backend(InferenceModel):
                                         "id": "model",
                                         "default": model_name,
                                         "check": {"value": "", 'check': "!="},
+                                        'multiple': True,
                                         "tooltip": "Which model to use when running OpenAI/GooseAI.",
                                         "menu_path": "",
                                         "refresh_model_inputs": False,
@@ -102,7 +103,7 @@ class model_backend(InferenceModel):
 
         engines = req.json()
         try:
-            engines = [{"text": en["name"], "value": en["name"]} for en in engines]
+            engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
         except:
             logger.error(engines)
             raise
diff --git a/static/koboldai.css b/static/koboldai.css
index f3dde4b7..b70c6877 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -352,7 +352,7 @@ border-top-right-radius: var(--tabs_rounding);
 	grid-template-areas: "label value"
 						 "item item"
 						 "minlabel maxlabel";
-	grid-template-rows: 20px 23px 20px;
+	grid-template-rows: 20px auto 20px;
 	grid-template-columns: auto 30px;
 	row-gap: 0.2em;
 	background-color: var(--setting_background);
@@ -2124,6 +2124,13 @@ body {
 	cursor: pointer;
 	background-color: #688f1f;
 }
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
 /*----------------------------- Model Load Popup ------------------------------------------*/
 
 #specspan, .popup_list_area .model_item .model {
@@ -3539,7 +3546,7 @@ h2 .material-icons-outlined {
 }
 
 
-.horde_trigger[model_model="ReadOnly"],
+.horde_trigger[model_model="Read Only"],
 .horde_trigger[model_model="CLUSTER"] {
 	display: none;
 }
diff --git a/static/koboldai.js b/static/koboldai.js
index 905403c1..399e52cf 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1695,12 +1695,20 @@ function model_settings_checker() {
 				for (const temp of this.check_data['sum']) {
 					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
 						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
-						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
 					}
 				}
 			} else {
 				this.closest(".setting_container_model").classList.add('input_error');
-				this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
 			}
 		}
 	}
@@ -1841,6 +1849,10 @@ function selected_model_info(sent_data) {
 				select_element.setAttribute("data_type", item['unit']);
 				select_element.onchange = onchange_event;
 				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
 				if ('check' in item) {
 					select_element.check_data = item['check'];
 				} else {
diff --git a/templates/popups.html b/templates/popups.html
index 59f07e70..9c6b4a9e 100644
--- a/templates/popups.html
+++ b/templates/popups.html
@@ -48,7 +48,7 @@
 		</span>
 		<div id="loadmodellistbreadcrumbs"></div>
 		<div id="loadmodellistcontent" class="popup_list_area"></div>
-		<div id="loadmodelplugin" class="popup_load_cancel loadmodelsettings"><select id="modelplugin" class="settings_select hidden"></select></div>
+		<div id="loadmodelplugin" class="popup_load_cancel"><select id="modelplugin" class="settings_select hidden"></select></div>
 		<div id="loadmodelsettings" class="popup_load_cancel loadmodelsettings"></div>
 		<div class="popup_load_cancel">
 			<button type="button" class="btn popup_load_cancel_button action_button disabled" onclick="load_model()" id="btn_loadmodelaccept" disabled>Load</button>

From 06f59a7b7b91eeee51471df3d2debdf5943c8f78 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 20:14:33 -0400
Subject: [PATCH 040/102] Moved model backends to separate folders added some
 model backend settings save/load

---
 aiserver.py                                   |   6 +-
 modeling/inference_model.py                   |   4 +
 .../inference_models/{api.py => api/class.py} |  11 +-
 .../{basic_api.py => basic_api/class.py}      |  10 +-
 .../class.py}                                 |   6 +-
 .../{gooseai.py => gooseai/class.py}          |   4 +-
 modeling/inference_models/{parents => }/hf.py |  23 +-
 .../{hf_mtj.py => hf_mtj/class.py}            |   2 +-
 .../{parents => }/hf_torch.py                 | 129 +--------
 .../{horde.py => horde/class.py}              |   9 +
 .../{openai.py => openai/class.py}            |   3 +-
 .../{parents => }/openai_gooseai.py           |  13 +-
 .../{readonly.py => readonly/class.py}        |   0
 modeling/inference_models/rwkv.py             | 258 ------------------
 14 files changed, 69 insertions(+), 409 deletions(-)
 rename modeling/inference_models/{api.py => api/class.py} (89%)
 rename modeling/inference_models/{basic_api.py => basic_api/class.py} (88%)
 rename modeling/inference_models/{generic_hf_torch.py => generic_hf_torch/class.py} (97%)
 rename modeling/inference_models/{gooseai.py => gooseai/class.py} (80%)
 rename modeling/inference_models/{parents => }/hf.py (94%)
 rename modeling/inference_models/{hf_mtj.py => hf_mtj/class.py} (99%)
 rename modeling/inference_models/{parents => }/hf_torch.py (85%)
 rename modeling/inference_models/{horde.py => horde/class.py} (95%)
 rename modeling/inference_models/{openai.py => openai/class.py} (81%)
 rename modeling/inference_models/{parents => }/openai_gooseai.py (93%)
 rename modeling/inference_models/{readonly.py => readonly/class.py} (100%)
 delete mode 100644 modeling/inference_models/rwkv.py

diff --git a/aiserver.py b/aiserver.py
index aeebdbc1..6a87d8d3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -626,9 +626,9 @@ import importlib
 model_backend_code = {}
 model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
-    if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py':
-        model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3]))
-        model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend()
+    if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
+        model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+        model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
         
 
 old_socketio_on = socketio.on
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index c3fff46f..491d2b05 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -188,6 +188,7 @@ class InferenceModel:
         self._pre_load()
         self._load(save_model=save_model, initial_load=initial_load)
         self._post_load()
+        self._save_settings()
 
     def unload(self):
         return
@@ -197,6 +198,9 @@ class InferenceModel:
 
     def _post_load(self) -> None:
         """Post load hook. Called after `_load()`."""
+    
+    def _save_settings(self) -> None:
+        """Save settings hook. Called after `_post_load()`."""
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         """Main load method. All logic related to loading the model onto the
diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api/class.py
similarity index 89%
rename from modeling/inference_models/api.py
rename to modeling/inference_models/api/class.py
index 409158f5..d9ec1147 100644
--- a/modeling/inference_models/api.py
+++ b/modeling/inference_models/api/class.py
@@ -26,19 +26,22 @@ class APIException(Exception):
 class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
-        #self.base_url = ""
+        self.base_url = ""
 
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.base_url = json.load(f)['base_url']
         requested_parameters = []
         requested_parameters.append({
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "base_url",
-                                        "default": False,
+                                        "default": self.base_url,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "The URL of the KoboldAI API to connect to.",
                                         "menu_path": "",
@@ -58,6 +61,10 @@ class model_backend(InferenceModel):
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
 
+    def _save_settings(self):
+        with open("settings/api.model_backend.settings", "w") as f:
+            json.dump({"base_url": self.base_url}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api/class.py
similarity index 88%
rename from modeling/inference_models/basic_api.py
rename to modeling/inference_models/basic_api/class.py
index cca9652b..6f045ef5 100644
--- a/modeling/inference_models/basic_api.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -24,6 +24,7 @@ class BasicAPIException(Exception):
 class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
+        self.colaburl = ""
 
         # Do not allow API to be served over the API
         self.capabilties = ModelCapabilities(api_host=False)
@@ -32,13 +33,16 @@ class model_backend(InferenceModel):
         return model_name == "Colab"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
+            with open("settings/api.model_backend.settings", "r") as f:
+                self.colaburl = json.load(f)['base_url']
         requested_parameters = []
         requested_parameters.append({
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "colaburl",
-                                        "default": False,
+                                        "default": self.colaburl,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "The URL of the Colab KoboldAI API to connect to.",
                                         "menu_path": "",
@@ -55,6 +59,10 @@ class model_backend(InferenceModel):
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B")
+    
+    def _save_settings(self):
+        with open("settings/basic_api.model_backend.settings", "w") as f:
+            json.dump({"colaburl": self.colaburl}, f, indent="")
 
     def _raw_generate(
         self,
diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch/class.py
similarity index 97%
rename from modeling/inference_models/generic_hf_torch.py
rename to modeling/inference_models/generic_hf_torch/class.py
index f7a00f45..4e2c8a5b 100644
--- a/modeling/inference_models/generic_hf_torch.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -20,7 +20,7 @@ except ModuleNotFoundError as e:
     if not utils.koboldai_vars.use_colab_tpu:
         raise e
 
-from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel
+from modeling.inference_models.hf_torch import HFTorchInferenceModel
 
 model_backend_name = "Huggingface"
 
@@ -270,3 +270,7 @@ class model_backend(HFTorchInferenceModel):
 
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
+
+    def _save_settings(self):
+        with open("settings/{}.generic_hf_torch.model_backend.settings".format(self.model_name.replace("/", "_")), "w") as f:
+            json.dump({"layers": self.layers if 'layers' in vars(self) else [], "disk_layers": self.disk_layers if 'disk_layers' in vars(self) else 0}, f, indent="")
\ No newline at end of file
diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai/class.py
similarity index 80%
rename from modeling/inference_models/gooseai.py
rename to modeling/inference_models/gooseai/class.py
index 9d6e8771..8d58b4b5 100644
--- a/modeling/inference_models/gooseai.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -11,14 +11,14 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
-
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 model_backend_name = "GooseAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
+        self.source = "GooseAI"
 
 
 class model_backend(openai_gooseai_model_backend):
diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/hf.py
similarity index 94%
rename from modeling/inference_models/parents/hf.py
rename to modeling/inference_models/hf.py
index 70143b69..bb3f7fe4 100644
--- a/modeling/inference_models/parents/hf.py
+++ b/modeling/inference_models/hf.py
@@ -3,6 +3,7 @@ from typing import Optional
 from transformers import AutoConfig
 import warnings
 import utils
+import json
 import koboldai_settings
 from logger import logger
 from modeling.inference_model import InferenceModel
@@ -44,16 +45,15 @@ class HFInferenceModel(InferenceModel):
             self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
         layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
         if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
-            if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))):
-                with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file:
-                    data = [x for x in file.read().split("\n")[:2] if x != '']
-                    if len(data) < 2:
-                        data.append("0")
-                    break_values, disk_blocks = data
-                    break_values = break_values.split(",")
+            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                    temp = json.load(f)
+                    break_values = temp['layers'] if 'layers' in temp else [layer_count]
+                    disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
             else:
                 break_values = [layer_count]
                 disk_blocks = 0
+            
             break_values = [int(x) for x in break_values if x != '' and x is not None]
             gpu_count = torch.cuda.device_count()
             break_values += [0] * (gpu_count - len(break_values))
@@ -132,8 +132,15 @@ class HFInferenceModel(InferenceModel):
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 gpu_count = torch.cuda.device_count()
                 layers = []
+                logger.info(parameters)
                 for i in range(gpu_count):
-                    layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None)
+                    logger.info(parameters["{}_Layers".format(i)])
+                    if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
+                        layers.append(int(parameters["{}_Layers".format(i)]))
+                    elif isinstance(parameters["{}_Layers".format(i)], str):
+                         layers.append(None)
+                    else:
+                        layers.append(parameters["{}_Layers".format(i)])
                 self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
                 if isinstance(self.cpu_layers, str):
                     self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj/class.py
similarity index 99%
rename from modeling/inference_models/hf_mtj.py
rename to modeling/inference_models/hf_mtj/class.py
index 6351eca2..4de3a1b2 100644
--- a/modeling/inference_models/hf_mtj.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -16,7 +16,7 @@ from modeling.inference_model import (
     GenerationSettings,
     ModelCapabilities,
 )
-from modeling.inference_models.parents.hf import HFInferenceModel
+from modeling.inference_models.hf import HFInferenceModel
 from modeling.tokenizer import GenericTokenizer
 
 model_backend_name = "Huggingface MTJ"
diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/hf_torch.py
similarity index 85%
rename from modeling/inference_models/parents/hf_torch.py
rename to modeling/inference_models/hf_torch.py
index f0a4a66e..8fdb8c64 100644
--- a/modeling/inference_models/parents/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -31,7 +31,7 @@ from modeling import warpers
 from modeling.warpers import Warper
 from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
-from modeling.inference_models.parents.hf import HFInferenceModel
+from modeling.inference_models.hf import HFInferenceModel
 from modeling.inference_model import (
     GenerationResult,
     GenerationSettings,
@@ -823,135 +823,10 @@ class HFTorchInferenceModel(HFInferenceModel):
             breakmodel.gpu_blocks = [0] * n_layers
             return
 
-        elif (
-            utils.args.breakmodel_gpulayers is not None
-            or utils.args.breakmodel_disklayers is not None
-            or breakmodel.gpu_blocks != []
-        ):
-            try:
-                if breakmodel.gpu_blocks == []:
-                    if utils.args.breakmodel_gpulayers:
-                        breakmodel.gpu_blocks = list(
-                            map(int, utils.args.breakmodel_gpulayers.split(","))
-                        )
-                assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count()
-                s = n_layers
-                for i in range(len(breakmodel.gpu_blocks)):
-                    if breakmodel.gpu_blocks[i] <= -1:
-                        breakmodel.gpu_blocks[i] = s
-                        break
-                    else:
-                        s -= breakmodel.gpu_blocks[i]
-                assert sum(breakmodel.gpu_blocks) <= n_layers
-                n_layers -= sum(breakmodel.gpu_blocks)
-                n_layers -= breakmodel.disk_blocks
-            except:
-                logger.warning(
-                    "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0."
-                )
-                breakmodel.gpu_blocks = [n_layers]
-                n_layers = 0
-        elif utils.args.breakmodel_layers is not None:
-            breakmodel.gpu_blocks = [
-                n_layers - max(0, min(n_layers, utils.args.breakmodel_layers))
-            ]
-            n_layers -= sum(breakmodel.gpu_blocks)
-        elif utils.args.model is not None:
+        elif breakmodel.gpu_blocks != []:
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0
-        else:
-            device_count = torch.cuda.device_count()
-            if device_count > 1:
-                print(
-                    Colors.CYAN
-                    + "\nPlease select one of your GPUs to be your primary GPU."
-                )
-                print(
-                    "VRAM usage in your primary GPU will be higher than for your other ones."
-                )
-                print("It is recommended you make your fastest GPU your primary GPU.")
-                self.breakmodel_device_list(n_layers)
-                while True:
-                    primaryselect = input("device ID> ")
-                    if (
-                        primaryselect.isnumeric()
-                        and 0 <= int(primaryselect) < device_count
-                    ):
-                        breakmodel.primary_device = int(primaryselect)
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between 0 and {device_count-1}.{Colors.END}"
-                        )
-            else:
-                breakmodel.primary_device = 0
-
-            print(
-                Colors.PURPLE
-                + "\nIf you don't have enough VRAM to run the model on a single GPU"
-            )
-            print(
-                "you can split the model between your CPU and your GPU(s), or between"
-            )
-            print("multiple GPUs if you have more than one.")
-            print("By putting more 'layers' on a GPU or CPU, more computations will be")
-            print(
-                "done on that device and more VRAM or RAM will be required on that device"
-            )
-            print("(roughly proportional to number of layers).")
-            print(
-                "It should be noted that GPUs are orders of magnitude faster than the CPU."
-            )
-            print(
-                f"This model has{Colors.YELLOW} {n_layers} {Colors.PURPLE}layers.{Colors.END}\n"
-            )
-
-            for i in range(device_count):
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=i
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into device {i}?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.gpu_blocks.append(layerselect)
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
-                if n_layers == 0:
-                    break
-
-            if n_layers > 0:
-                self.breakmodel_device_list(
-                    n_layers, primary=breakmodel.primary_device, selected=-1
-                )
-                print(
-                    f"{Colors.CYAN}\nHow many of the remaining{Colors.YELLOW} {n_layers} {Colors.CYAN}layers would you like to put into the disk cache?\nYou can also enter -1 to allocate all remaining layers to this device.{Colors.END}\n"
-                )
-                while True:
-                    layerselect = input("# of layers> ")
-                    if (
-                        layerselect.isnumeric() or layerselect.strip() == "-1"
-                    ) and -1 <= int(layerselect) <= n_layers:
-                        layerselect = int(layerselect)
-                        layerselect = n_layers if layerselect == -1 else layerselect
-                        breakmodel.disk_blocks = layerselect
-                        n_layers -= layerselect
-                        break
-                    else:
-                        print(
-                            f"{Colors.RED}Please enter an integer between -1 and {n_layers}.{Colors.END}"
-                        )
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)
diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde/class.py
similarity index 95%
rename from modeling/inference_models/horde.py
rename to modeling/inference_models/horde/class.py
index 8e05fbbd..387c5833 100644
--- a/modeling/inference_models/horde.py
+++ b/modeling/inference_models/horde/class.py
@@ -38,6 +38,11 @@ class model_backend(InferenceModel):
         return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
+            with open("settings/horde.model_backend.settings", "r") as f:
+                temp = json.load(f)
+                self.base_url = temp['url']
+                self.key = temp['key']
         requested_parameters = []
         requested_parameters.extend([{
                                         "uitype": "text",
@@ -122,6 +127,10 @@ class model_backend(InferenceModel):
             #else "gpt2",
         )
 
+    def _save_settings(self):
+        with open("settings/horde.model_backend.settings", "w") as f:
+            json.dump({"key": self.key, "url": self.url}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai/class.py
similarity index 81%
rename from modeling/inference_models/openai.py
rename to modeling/inference_models/openai/class.py
index 19a7d1e6..84fe6df9 100644
--- a/modeling/inference_models/openai.py
+++ b/modeling/inference_models/openai/class.py
@@ -11,13 +11,14 @@ from modeling.inference_model import (
     InferenceModel,
 )
 
-from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend
+from modeling.inference_models.openai_gooseai import model_backend as openai_gooseai_model_backend
 
 model_backend_name = "OpenAI"
 
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
+        self.source = "OpenAI"
 
 
 class model_backend(openai_gooseai_model_backend):
diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
similarity index 93%
rename from modeling/inference_models/parents/openai_gooseai.py
rename to modeling/inference_models/openai_gooseai.py
index 871ea5ce..4d885074 100644
--- a/modeling/inference_models/parents/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -25,15 +25,14 @@ class model_backend(InferenceModel):
         super().__init__()
         self.key = ""
         self.url = "https://api.goose.ai/v1/engines"
-        #if self.source == 'OAI':
-        #    url = "https://api.openai.com/v1/engines"
-        #elif self.source == 'GooseAI':
-        #    url = "https://api.goose.ai/v1/engines"
     
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
+            with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
+                self.key = json.load(f)['key']
         self.source = model_name
         requested_parameters = []
         requested_parameters.extend([{
@@ -41,7 +40,7 @@ class model_backend(InferenceModel):
                                         "unit": "text",
                                         "label": "Key",
                                         "id": "key",
-                                        "default": "",
+                                        "default": self.key,
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "User Key to use when connecting to OpenAI/GooseAI.",
                                         "menu_path": "",
@@ -106,6 +105,10 @@ class model_backend(InferenceModel):
     def _load(self, save_model: bool, initial_load: bool) -> None:
         self.tokenizer = self._get_tokenizer("gpt2")
 
+    def _save_settings(self):
+        with open("settings/{}.model_backend.settings".format(self.source), "w") as f:
+            json.dump({"key": self.key}, f, indent="")
+
     def _raw_generate(
         self,
         prompt_tokens: Union[List[int], torch.Tensor],
diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly/class.py
similarity index 100%
rename from modeling/inference_models/readonly.py
rename to modeling/inference_models/readonly/class.py
diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py
deleted file mode 100644
index fa6497b7..00000000
--- a/modeling/inference_models/rwkv.py
+++ /dev/null
@@ -1,258 +0,0 @@
-from __future__ import annotations
-import os
-
-
-import time
-from typing import Dict, List, Optional, Union
-import numpy as np
-import requests
-from tokenizers import Tokenizer
-from tqdm import tqdm
-from huggingface_hub import hf_hub_url
-
-import torch
-from torch.nn import functional as F
-
-# Must be defined before import
-os.environ["RWKV_JIT_ON"] = "1"
-# TODO: Include compiled kernel
-os.environ["RWKV_CUDA_ON"] = "1"
-
-
-import utils
-from logger import logger
-
-from modeling import warpers
-from modeling.warpers import Warper
-from modeling.stoppers import Stoppers
-from modeling.post_token_hooks import PostTokenHooks
-from modeling.tokenizer import GenericTokenizer
-from modeling.inference_model import (
-    GenerationResult,
-    GenerationSettings,
-    InferenceModel,
-    ModelCapabilities,
-)
-
-TOKENIZER_URL = (
-    "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json"
-)
-TOKENIZER_PATH = "models/rwkv/20b_tokenizer.json"
-
-REPO_OWNER = "BlinkDL"
-MODEL_FILES = {
-    "rwkv-4-pile-14b": "RWKV-4-Pile-14B-20230213-8019.pth",
-    # NOTE: Still in progress(?)
-    "rwkv-4-pile-14b:ctx4096": "RWKV-4-Pile-14B-20230228-ctx4096-test663.pth",
-    "rwkv-4-pile-7b": "RWKV-4-Pile-7B-20221115-8047.pth",
-    "rwkv-4-pile-7b:ctx4096": "RWKV-4-Pile-7B-20230109-ctx4096.pth",
-    "rwkv-4-pile-3b": "RWKV-4-Pile-3B-20221008-8023.pth",
-    "rwkv-4-pile-3b:ctx4096": "RWKV-4-Pile-3B-20221110-ctx4096.pth",
-    "rwkv-4-pile-1b5": "RWKV-4-Pile-1B5-20220903-8040.pth",
-    "rwkv-4-pile-1b5:ctx4096": "RWKV-4-Pile-1B5-20220929-ctx4096.pth",
-    "rwkv-4-pile-430m": "RWKV-4-Pile-430M-20220808-8066.pth",
-    "rwkv-4-pile-169m": "RWKV-4-Pile-169M-20220807-8023.pth",
-}
-
-
-model_backend_name = "RWKV"
-
-
-class model_backend(InferenceModel):
-    def __init__(
-        self,
-        #model_name: str,
-    ) -> None:
-        super().__init__()
-        #self.model_name = model_name
-
-        self.post_token_hooks = [
-            PostTokenHooks.stream_tokens,
-        ]
-
-        self.stopper_hooks = [
-            Stoppers.core_stopper,
-            Stoppers.dynamic_wi_scanner,
-            Stoppers.singleline_stopper,
-            Stoppers.chat_mode_stopper,
-            Stoppers.stop_sequence_stopper,
-        ]
-
-        self.capabilties = ModelCapabilities(
-            embedding_manipulation=False,
-            post_token_hooks=True,
-            stopper_hooks=True,
-            post_token_probs=True,
-        )
-        self._old_stopping_criteria = None
-
-    def is_valid(self, model_name, model_path, menu_path):
-        try:
-            from rwkv.model import RWKV
-            valid = True
-        except:
-            valid = False
-        return valid and "rwkv" in model_name.lower()
-    
-    def get_requested_parameters(self, model_name, model_path, menu_path):
-        self.source = model_name
-        requested_parameters = []
-        return requested_parameters
-        
-    def set_input_parameters(self):
-        return
-
-
-    def _ensure_directory_structure(self) -> None:
-        for path in ["models/rwkv", "models/rwkv/models"]:
-            try:
-                os.mkdir(path)
-            except FileExistsError:
-                pass
-
-    def _get_tokenizer(self) -> GenericTokenizer:
-        if not os.path.exists(TOKENIZER_PATH):
-            logger.info("RWKV tokenizer not found, downloading...")
-
-            r = requests.get(TOKENIZER_URL)
-            with open(TOKENIZER_PATH, "wb") as file:
-                file.write(r.content)
-
-        return GenericTokenizer(Tokenizer.from_file(TOKENIZER_PATH))
-
-    def _download_model(self, model_path: str, model_class: str) -> None:
-        logger.info(f"{self.model_name} not found, downloading...")
-
-        url = hf_hub_url(
-            repo_id=f"{REPO_OWNER}/{model_class}",
-            filename=MODEL_FILES[self.model_name],
-        )
-
-        # TODO: Use aria2
-        # https://stackoverflow.com/a/57030446
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            bar = tqdm(
-                desc="Downloading RWKV Model",
-                unit="B",
-                unit_scale=True,
-                total=int(r.headers["Content-Length"]),
-            )
-            with open(model_path, "wb") as file:
-                for chunk in r.iter_content(chunk_size=8192):
-                    if not chunk:
-                        continue
-                    file.write(chunk)
-                    bar.update(len(chunk))
-
-    def _load(self, save_model: bool, initial_load: bool) -> None:
-        self._ensure_directory_structure()
-        self.tokenizer = self._get_tokenizer()
-
-        # Parse model name
-        model_class, _, special = self.model_name.partition(":")
-        special = special or None
-
-        model_dir = os.path.join("models", "rwkv", "models", model_class)
-        if not os.path.exists(model_dir):
-            os.mkdir(model_dir)
-
-        # Download model if we need to
-        model_path = os.path.join(model_dir, MODEL_FILES[self.model_name])
-        if not os.path.exists(model_path):
-            self._download_model(model_path, model_class)
-
-        # Now we load!
-
-        # TODO: Breakmodel to strat
-        from rwkv.model import RWKV
-        self.model = RWKV(model=model_path, strategy="cuda:0 fp16")
-
-    def _apply_warpers(
-        self, scores: torch.Tensor, input_ids: torch.Tensor
-    ) -> torch.Tensor:
-        warpers.update_settings()
-        for sid in utils.koboldai_vars.sampler_order:
-            warper = Warper.from_id(sid)
-
-            if not warper.value_is_valid():
-                continue
-
-            if warper == warpers.RepetitionPenalty:
-                # Rep pen needs more data than other samplers
-                scores = warper.torch(scores, input_ids=input_ids)
-            else:
-                scores = warper.torch(scores)
-        return scores
-
-    def _sample_token(self, logits: torch.Tensor, input_ids: torch.Tensor) -> int:
-        probs = F.softmax(logits.float(), dim=-1)
-
-        if probs.device == torch.device("cpu"):
-            probs = probs.numpy()
-            sorted_ids = np.argsort(probs)
-            sorted_probs = probs[sorted_ids][::-1]
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            probs = probs / np.sum(probs)
-            out = np.random.choice(a=len(probs), p=probs)
-            return int(out)
-        else:
-            sorted_ids = torch.argsort(probs)
-            sorted_probs = probs[sorted_ids]
-            sorted_probs = torch.flip(sorted_probs, dims=(0,))
-
-            probs = self._apply_warpers(probs[None, :], input_ids)
-
-            # TODO: is this right?
-            probs[probs == -torch.inf] = 0.0
-
-            out = torch.multinomial(probs, num_samples=1)[0]
-            return int(out)
-
-    def _raw_generate(
-        self,
-        prompt_tokens: Union[List[int], torch.Tensor],
-        max_new: int,
-        gen_settings: GenerationSettings,
-        single_line: bool = False,
-        batch_count: int = 1,
-        seed: Optional[int] = None,
-        **kwargs,
-    ) -> GenerationResult:
-        if seed is not None:
-            torch.manual_seed(seed)
-
-        aux_device = utils.get_auxilary_device()
-        context = torch.tensor(prompt_tokens)[None, :].to(aux_device)
-        out = []
-
-        start_time = time.time()
-        with torch.no_grad():
-            logits, state = self.model.forward(prompt_tokens, None)
-            last_token = prompt_tokens[-1]
-
-            for _ in range(max_new):
-
-                logits, state = self.model.forward([last_token], state)
-                last_token = self._sample_token(logits, context)
-                out.append(last_token)
-                add = torch.tensor([[last_token]]).to(aux_device)
-                context = torch.cat((context, add), dim=-1)
-                self._post_token_gen(context)
-
-        logger.debug(
-            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
-        )
-
-        return GenerationResult(
-            self,
-            out_batches=torch.tensor([out]),
-            prompt=prompt_tokens,
-            is_whole_generation=False,
-            output_includes_prompt=True,
-        )

From 56d2705f4bea99c65465f51c067ebb40693fd89b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 18 May 2023 20:19:33 -0400
Subject: [PATCH 041/102] removed breakmodel command line arguments (except
 nobreakmodel)

---
 aiserver.py                           | 13 -------------
 modeling/inference_models/hf_torch.py |  2 --
 2 files changed, 15 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 6a87d8d3..b8f96a68 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1363,10 +1363,6 @@ def general_startup(override_args=None):
     parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.")
     parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)")
     parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.")
-    parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_layers", type=int, help=argparse.SUPPRESS)
-    parser.add_argument("--breakmodel_gpulayers", type=str, help="If using a model that supports hybrid generation, this is a comma-separated list that specifies how many layers to put on each GPU device. For example to put 8 layers on device 0, 9 layers on device 1 and 11 layers on device 2, use --breakmodel_gpulayers 8,9,11")
-    parser.add_argument("--breakmodel_disklayers", type=int, help="If using a model that supports hybrid generation, this is the number of layers to put in disk cache.")
     parser.add_argument("--override_delete", action='store_true', help="Deleting stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow deleting stories if using --remote and prevent deleting stories otherwise.")
     parser.add_argument("--override_rename", action='store_true', help="Renaming stories from inside the browser is disabled if you are using --remote and enabled otherwise. Using this option will instead allow renaming stories if using --remote and prevent renaming stories otherwise.")
     parser.add_argument("--configname", help="Force a fixed configuration name to aid with config management.")
@@ -1644,15 +1640,6 @@ def load_model(model_backend, initial_load=False):
         # loadsettings()
         logger.init("GPU support", status="Searching")
         koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel
-        if(args.breakmodel is not None and args.breakmodel):
-            logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).")
-        if(args.breakmodel_layers is not None):
-            logger.warning("--breakmodel_layers is deprecated. Use --breakmodel_gpulayers instead (see --help for details).")
-        if(args.model and koboldai_vars.bmsupported and not args.breakmodel_gpulayers and not args.breakmodel_layers and (not args.breakmodel_disklayers)):
-            logger.warning("Model launched without the --breakmodel_gpulayers argument, defaulting to GPU only mode.")
-            koboldai_vars.bmsupported = False
-        if(not koboldai_vars.bmsupported and (args.breakmodel_gpulayers is not None or args.breakmodel_layers is not None or args.breakmodel_disklayers is not None)):
-            logger.warning("This model does not support hybrid generation. --breakmodel_gpulayers will be ignored.")
         if(koboldai_vars.hascuda):
             logger.init_ok("GPU support", status="Found")
         else:
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 8fdb8c64..71da3606 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -401,8 +401,6 @@ class HFTorchInferenceModel(HFInferenceModel):
         if not self.lazy_load:
             return
 
-        if utils.args.breakmodel_disklayers is not None:
-            breakmodel.disk_blocks = utils.args.breakmodel_disklayers
 
         disk_blocks = breakmodel.disk_blocks
         gpu_blocks = breakmodel.gpu_blocks

From 99cffd4755940bd6b78d44db3a1f1e157fcf0ca4 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 09:11:08 -0400
Subject: [PATCH 042/102] Colab GPU edition fixes

---
 aiserver.py                     | 10 ++++++++++
 koboldai_settings.py            |  3 +--
 modeling/inference_models/hf.py |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b8f96a68..36b18626 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1558,15 +1558,25 @@ def general_startup(override_args=None):
         #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it
         parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
         ok_to_load = True
+        mising_parameters = []
         arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        
+        #If we're on colab we'll set everything to GPU0
+        logger.info("colab: {} model_backend: {} on_colab: {}".format(args.colab, args.model_backend, koboldai_vars.on_colab))
+        if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
+            logger.info("Using Colab Special path")
+            arg_parameters['use_gpu'] = True
+        
         for parameter in parameters:
             if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+                mising_parameters.append(parameter['id'])
                 ok_to_load = False
             elif parameter['id'] not in arg_parameters:
                 arg_parameters[parameter] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
         arg_parameters['id'] = args.model
         arg_parameters['model_path'] = args.path
diff --git a/koboldai_settings.py b/koboldai_settings.py
index f3aa0ca9..5467fe29 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1202,7 +1202,6 @@ class undefined_settings(settings):
         super().__setattr__(name, value)
         logger.error("{} just set {} to {} in koboldai_vars. That variable isn't defined!".format(inspect.stack()[1].function, name, value))
         
-
 class system_settings(settings):
     local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                             'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
@@ -1210,7 +1209,7 @@ class system_settings(settings):
                             'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
     no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                          'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
-                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 
+                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'on_colab'
                          'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
                          'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
     settings_name = "system"
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index bb3f7fe4..6f848fa9 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -116,7 +116,7 @@ class HFInferenceModel(InferenceModel):
                                             "label": "Use GPU",
                                             "id": "use_gpu",
                                             "default": False,
-                                            "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                            "tooltip": "Whether or not to use the GPU",
                                             "menu_path": "Layers",
                                             "extra_classes": "",
                                             "refresh_model_inputs": False

From 7e0778c871f30e17b1870c21fe7c6fb2244a839b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 09:14:37 -0400
Subject: [PATCH 043/102] Remove extra debug stuff

---
 aiserver.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 36b18626..b4aad4e7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1562,9 +1562,7 @@ def general_startup(override_args=None):
         arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
         
         #If we're on colab we'll set everything to GPU0
-        logger.info("colab: {} model_backend: {} on_colab: {}".format(args.colab, args.model_backend, koboldai_vars.on_colab))
         if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
-            logger.info("Using Colab Special path")
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:

From caef2edcfc784e210d4770a6e7337686b6c92d5b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 12:35:39 -0400
Subject: [PATCH 044/102] Migrated load dialog to UI1

---
 static/application.js | 684 ++++++++++++++++++++++++++++++++++++++++--
 static/custom.css     | 602 +++++++++++++++++++++++++++++++++++++
 static/koboldai.css   |   5 +
 static/koboldai.js    |   6 +
 templates/index.html  |  62 +---
 5 files changed, 1281 insertions(+), 78 deletions(-)

diff --git a/static/application.js b/static/application.js
index df51b06e..99a65ed7 100644
--- a/static/application.js
+++ b/static/application.js
@@ -1,3 +1,5 @@
+
+
 //=================================================================//
 //  VARIABLES
 //=================================================================//
@@ -2333,6 +2335,8 @@ $(document).ready(function(){
 	socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);});
 	socket.on('popup_edit_file', function(data){popup_edit_file(data);});
 	socket.on('error_popup', function(data){error_popup(data);});
+	socket.on('open_model_load_menu', function(data){show_model_menu(data);});
+	socket.on('selected_model_info', function(data){selected_model_info(data);});
 
 	socket.on('from_server', function(msg) {
 		//console.log(msg);
@@ -3332,28 +3336,6 @@ $(document).ready(function(){
 		hideLoadPopup();
 	});
 	
-	load_model_accept.on("click", function(ev) {
-		hideMessage();
-		var gpu_layers;
-		var message;
-		if($("#modellayers")[0].classList.contains('hidden')) {
-			gpu_layers = ","
-		} else {
-			gpu_layers = ""
-			for (let i=0; i < $("#gpu_count")[0].value; i++) {
-				gpu_layers += $("#gpu_layers"+i)[0].value + ",";
-			}
-		}
-		var disk_layers = $("#disk_layers").length > 0 ? $("#disk_layers")[0].value : 0;
-		models = getSelectedOptions(document.getElementById('oaimodel'));
-		if (models.length == 1) {
-			models = models[0];
-		}
-		message = {'cmd': 'load_model', 'use_gpu': $('#use_gpu')[0].checked, 'key': $('#modelkey')[0].value, 'gpu_layers': gpu_layers.slice(0, -1), 'disk_layers': disk_layers, 'url': $('#modelurl')[0].value, 'online_model': models};
-		socket.send(message);
-		loadmodelcontent.html("");
-		hideLoadModelPopup();
-	});
 
 	sp_close.on("click", function(ev) {
 		hideSPPopup();
@@ -3388,8 +3370,9 @@ $(document).ready(function(){
 	});
 	
 	button_loadmodel.on("click", function(ev) {
-		showLoadModelPopup();
-		socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		//showLoadModelPopup();
+		//socket.send({'cmd': 'list_model', 'data': 'mainmenu'});
+		socket.emit('load_model_button', {});
 	});
 	button_showmodel.on("click", function(ev) {
 		socket.send({'cmd': 'show_model', 'data': ''});
@@ -3836,3 +3819,656 @@ function show_message(data) {
 	
 	document.getElementById('message-popup').classList.remove('hidden');
 }
+
+
+
+
+
+
+
+//-----------------------------------------------------Copy from UI2--------------------------------------------------------
+function show_model_menu(data) {
+	console.log(data);
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	model_plugin.classList.add("hidden");
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	//clear out the breadcrumbs
+	var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs')
+	while (breadcrumbs.firstChild) {
+		breadcrumbs.removeChild(breadcrumbs.firstChild);
+	}
+	
+	//add breadcrumbs
+	if ('breadcrumbs' in data) {
+		for (item of data.breadcrumbs) {
+			var button = document.createElement("button");
+			button.classList.add("breadcrumbitem");
+			button.setAttribute("model", data.menu);
+			button.setAttribute("folder", item[0]);
+			button.textContent = item[1];
+			button.onclick = function () {
+						socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")});
+					};
+			breadcrumbs.append(button);
+			var span = document.createElement("span");
+			span.textContent = "\\";
+			breadcrumbs.append(span);
+		}
+	}
+	//clear out the items
+	var model_list = document.getElementById('loadmodellistcontent')
+	while (model_list.firstChild) {
+		model_list.removeChild(model_list.firstChild);
+	}
+	//add items
+	for (item of data.items) {
+		var list_item = document.createElement("span");
+		list_item.classList.add("model_item");
+		
+		//create the folder icon
+		var folder_icon = document.createElement("span");
+		folder_icon.classList.add("material-icons-outlined");
+		folder_icon.classList.add("cursor");
+
+		let isModel = !(
+			item.isMenu ||
+			item.label === "Load a model from its directory" ||
+			item.label === "Load an old GPT-2 model (eg CloverEdition)"
+		);
+
+		folder_icon.textContent = isModel ? "psychology" : "folder";
+		list_item.append(folder_icon);
+		
+		
+		//create the actual item
+		var popup_item = document.createElement("span");
+		popup_item.classList.add("model");
+		for (const key in item) {
+			if (key == "name") {
+				popup_item.id = item[key];
+			} 
+			popup_item.setAttribute(key, item[key]);
+		}
+		
+		popup_item.onclick = function() { 
+			var attributes = this.attributes;
+			var obj = {};
+
+			for (var i = 0, len = attributes.length; i < len; i++) {
+				obj[attributes[i].name] = attributes[i].value;
+			}
+			//put the model data on the accept button so we can send it to the server when you accept
+			var accept = document.getElementById("popup_accept");
+			selected_model_data = obj;
+			//send the data to the server so it can figure out what data we need from the user for the model
+			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
+		}
+		
+		//name text
+		var text = document.createElement("span");
+		text.style="grid-area: item;";
+		text.textContent = item.label;
+		popup_item.append(text);
+		//model size text
+		var text = document.createElement("span");
+		text.textContent = item.size;
+		text.style="grid-area: gpu_size;padding: 2px;";
+		popup_item.append(text);
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!isModel) return;
+
+			let parameterCount = getModelParameterCount(item.label);
+			if (!parameterCount) return;
+
+			let warningText = "";
+
+			if (parameterCount > 25_000_000_000) warningText = "This is a very high-end model and will likely not run without a specialized setup."; // 25B
+			if (parameterCount < 2_000_000_000) warningText = "This is a lower-end model and may perform poorly.";			// 2B
+			if (parameterCount < 1_000_000_000) warningText = "This is a very low-end model and may perform incoherently.";	// 1B
+
+			if (!warningText) return;
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-size-warning"],
+				innerText: "warning",
+				"style.grid-area": "warning_icon",
+				tooltip: warningText
+			});
+
+		})();
+
+		(function() {
+			// Anon function to avoid unreasonable indentation
+			if (!item.isDownloaded) return;
+			if (!isModel) return;
+
+			$e("span", list_item, {
+				classes: ["material-icons-outlined", "model-download-notification"],
+				innerText: "download_done",
+				"style.grid-area": "downloaded_icon",
+				tooltip: "This model is already downloaded."
+			});
+		})();
+		
+		list_item.append(popup_item);
+		model_list.append(list_item);
+	}
+	
+	
+	openPopup("load-model");
+	
+}
+
+function model_settings_checker() {
+	//get check value:
+	missing_element = false;
+	if (this.check_data != null) {
+		if ('sum' in this.check_data) {
+			check_value = 0
+			for (const temp of this.check_data['sum']) {
+				if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+					check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").value);
+				} else {
+					missing_element = true;
+				}
+			}
+		} else {
+			check_value = this.value
+		}
+		if (this.check_data['check'] == "=") {
+			valid = (check_value == this.check_data['value']);
+		} else if (this.check_data['check'] == "!=") {
+			valid = (check_value != this.check_data['value']);
+		} else if (this.check_data['check'] == ">=") {
+			valid = (check_value >= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value <= this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value > this.check_data['value']);
+		} else if (this.check_data['check'] == "<=") {	
+			valid = (check_value < this.check_data['value']);
+		}
+		if (valid || missing_element) {
+			//if we are supposed to refresh when this value changes we'll resubmit
+			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
+				console.log("resubmit");
+			}
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.remove('input_error');
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.remove('input_error');
+				this.closest(".setting_container_model").removeAttribute("tooltip");
+			}
+		} else {
+			if ('sum' in this.check_data) {
+				for (const temp of this.check_data['sum']) {
+					if (document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value")) {
+						document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").classList.add('input_error');
+						if (this.check_data['check_message']) {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+						} else {
+							document.getElementById(this.id.split("|")[0] +"|"  + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip");
+						}
+					}
+				}
+			} else {
+				this.closest(".setting_container_model").classList.add('input_error');
+				if (this.check_data['check_message']) {
+					this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']);
+				} else {
+					this.closest(".setting_container_model").removeAttribute("tooltip");
+				}
+			}
+		}
+	}
+	var accept = document.getElementById("btn_loadmodelaccept");
+	ok_to_load = true;
+	for (const item of document.getElementsByClassName("input_error")) {
+		if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) {
+			ok_to_load = false;
+			break;
+		}
+	}
+	
+	if (ok_to_load) {
+		accept.classList.remove("disabled");
+		accept.disabled = false;
+	} else {
+		accept.classList.add("disabled");
+		accept.disabled = true;
+	}
+}
+
+function selected_model_info(sent_data) {
+	const data = sent_data['model_backends'];
+	//clear out the loadmodelsettings
+	var loadmodelsettings = document.getElementById('loadmodelsettings')
+	while (loadmodelsettings.firstChild) {
+		loadmodelsettings.removeChild(loadmodelsettings.firstChild);
+	}
+	//Clear out plugin selector
+	var model_plugin = document.getElementById('modelplugin');
+	while (model_plugin.firstChild) {
+		model_plugin.removeChild(model_plugin.firstChild);
+	}
+	
+	var accept = document.getElementById("btn_loadmodelaccept");
+	accept.disabled = false;
+	
+	modelplugin = document.getElementById("modelplugin");
+	modelplugin.classList.remove("hidden");
+	modelplugin.onchange = function () {
+		for (const area of document.getElementsByClassName("model_plugin_settings_area")) {
+				area.classList.add("hidden");
+		}
+		if (document.getElementById(this.value + "_settings_area")) {
+			document.getElementById(this.value + "_settings_area").classList.remove("hidden");
+		}
+		model_settings_checker()
+	}
+	//create the content
+	for (const [loader, items] of Object.entries(data)) {
+		model_area = document.createElement("DIV");
+		model_area.id = loader + "_settings_area";
+		model_area.classList.add("model_plugin_settings_area");
+		model_area.classList.add("hidden");
+		modelpluginoption = document.createElement("option");
+		modelpluginoption.innerText = loader;
+		modelpluginoption.value = loader;
+		modelplugin.append(modelpluginoption);
+		if (loader == sent_data['preselected']) {
+			modelplugin.value = sent_data['preselected'];
+		}
+		
+		//create the user input for each requested input
+		for (item of items) {
+			let new_setting = document.getElementById('blank_model_settings').cloneNode(true);
+			new_setting.id = loader;
+			new_setting.classList.remove("hidden");
+			new_setting.querySelector('#blank_model_settings_label').innerText = item['label'];
+			new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']);
+			
+			onchange_event = model_settings_checker;
+			if (item['uitype'] == "slider") {
+				var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number');
+				slider_number.value = item['default'];
+				slider_number.id = loader + "|" + item['id'] + "_value_text";
+				slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;};
+
+				var slider = new_setting.querySelector('#blank_model_settings_slider');
+				slider.value = item['default'];
+				slider.min = item['min'];
+				slider.max = item['max'];
+				slider.setAttribute("data_type", item['unit']);
+				slider.id = loader + "|" + item['id'] + "_value";
+				if ('check' in item) {
+					slider.check_data = item['check'];
+					slider_number.check_data = item['check'];
+				} else {
+					slider.check_data = null;
+					slider_number.check_data = null;
+				}
+				slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;};
+				slider.onchange = onchange_event;
+				slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min'];
+				new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max'];
+				slider.noresubmit = true;
+				slider.onchange();
+				slider.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_slider').remove();
+			}
+			if (item['uitype'] == "toggle") {
+				toggle = document.createElement("input");
+				toggle.type='checkbox';
+				toggle.classList.add("setting_item_input");
+				toggle.classList.add("blank_model_settings_input");
+				toggle.classList.add("model_settings_input");
+				toggle.id = loader + "|" + item['id'] + "_value";
+				toggle.checked = item['default'];
+				toggle.onclick = onchange_event;
+				toggle.setAttribute("data_type", item['unit']);
+				toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					toggle.check_data = item['check'];
+				} else {
+					toggle.check_data = null;
+				}
+				new_setting.querySelector('#blank_model_settings_toggle').append(toggle);
+				setTimeout(function() {
+										  $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"});
+										}, 200);
+				toggle.noresubmit = true;
+				toggle.onclick();
+				toggle.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_toggle').remove();
+			}
+			if (item['uitype'] == "dropdown") {
+				var select_element = new_setting.querySelector('#blank_model_settings_dropdown');
+				select_element.id = loader + "|" + item['id'] + "_value";
+				for (const dropdown_value of item['children']) {
+					new_option = document.createElement("option");
+					new_option.value = dropdown_value['value'];
+					new_option.innerText = dropdown_value['text'];
+					select_element.append(new_option);
+				}
+				select_element.value = item['default'];
+				select_element.setAttribute("data_type", item['unit']);
+				select_element.onchange = onchange_event;
+				select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if (('multiple' in item) && (item['multiple'])) {
+					select_element.multiple = true;
+					select_element.size = 10;
+				}
+				if ('check' in item) {
+					select_element.check_data = item['check'];
+				} else {
+					select_element.check_data = null;
+				}
+				select_element.noresubmit = true;
+				select_element.onchange();
+				select_element.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_dropdown').remove();
+			}
+			if (item['uitype'] == "password") {
+				var password_item = new_setting.querySelector('#blank_model_settings_password');
+				password_item.id = loader + "|" + item['id'] + "_value";
+				password_item.value = item['default'];
+				password_item.setAttribute("data_type", item['unit']);
+				password_item.onchange = onchange_event;
+				password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					password_item.check_data = item['check'];
+				} else {
+					password_item.check_data = null;
+				}
+				password_item.noresubmit = true;
+				password_item.onchange();
+				password_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_password').remove();
+			}
+			if (item['uitype'] == "text") {
+				var text_item = new_setting.querySelector('#blank_model_settings_text');
+				text_item.id = loader + "|" + item['id'] + "_value";
+				text_item.value = item['default'];
+				text_item.onchange = onchange_event;
+				text_item.setAttribute("data_type", item['unit']);
+				text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']);
+				if ('check' in item) {
+					text_item.check_data = item['check'];
+				} else {
+					text_item.check_data = null;
+				}
+				text_item.noresubmit = true;
+				text_item.onchange();
+				text_item.noresubmit = false;
+			} else {
+				new_setting.querySelector('#blank_model_settings_text').remove();
+			}
+			
+			model_area.append(new_setting);
+			loadmodelsettings.append(model_area);
+		}
+	}
+	
+	//unhide the first plugin settings
+	console.log(document.getElementById("modelplugin").value + "_settings_area");
+	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
+		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
+	}
+	
+}
+
+function getModelParameterCount(modelName) {
+	if (!modelName) return null;
+
+	// The "T" and "K" may be a little optimistic...
+	let paramsString = modelName.toUpperCase().match(/[\d.]+[TBMK]/)
+	if (!paramsString) return null;
+	paramsString = paramsString[0];
+
+	let base = parseFloat(paramsString);
+	let multiplier = {T: 1_000_000_000_000, B: 1_000_000_000, M: 1_000_000, K: 1_000}[paramsString[paramsString.length - 1]];
+
+	return base * multiplier;
+}
+
+function openPopup(id) {
+	closePopups();
+
+	const container = document.getElementById("popup-container");
+	container.classList.remove("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+
+	const popup = document.getElementById(`${id}`);
+	popup.classList.remove("hidden");
+
+	// Sometimes we want to instantly focus on certain elements when a menu opens.
+	for (const noticeMee of popup.getElementsByClassName("focus-on-me")) {
+		noticeMee.focus();
+		break;
+	}
+}
+
+function closePopups() {
+	const container = document.getElementById("popup-container");
+	container.classList.add("hidden");
+
+	for (const popupWindow of container.children) {
+		popupWindow.classList.add("hidden");
+	}
+}
+
+function $el(selector) {
+	// We do not preemptively fetch all elements upon execution (wall of consts)
+	// due to the layer of mental overhead it adds to debugging and reading
+	// code in general.
+	return document.querySelector(selector);
+}
+
+function $e(tag, parent, attributes, insertionLocation=null) {
+	// Small helper function for dynamic UI creation
+
+	let element = document.createElement(tag);
+
+	if (!attributes) attributes = {};
+
+	if ("classes" in attributes) {
+		if (!Array.isArray(attributes.classes)) throw Error("Classes was not array!");
+		for (const className of attributes.classes) {
+			element.classList.add(className);
+		}
+		delete attributes.classes;
+	}
+
+
+	for (const [attribute, value] of Object.entries(attributes)) {
+		if (attribute.includes(".")) {
+			let ref = element;
+			const parts = attribute.split(".");
+
+			for (const part of parts.slice(0, -1)) {
+				ref = ref[part];
+			}
+
+			ref[parts[parts.length - 1]] = value;
+			continue;
+		}
+
+		if (attribute in element) {
+			element[attribute] = value;
+		} else {
+			element.setAttribute(attribute, value);
+		}
+	}
+
+	if (!parent) return element;
+
+	if (insertionLocation && Object.keys(insertionLocation).length) {
+		let [placement, target] = Object.entries(insertionLocation)[0];
+		if (placement === "before") {
+			parent.insertBefore(element, target);
+		} else if (placement === "after") {
+			parent.insertBefore(element, target.nextSibling);
+		} else {
+			throw Error(`I have no clue what placement ${placement} is`);
+		}
+	} else {
+		parent.appendChild(element);
+	}
+
+	return element;
+}
+
+function load_model() {
+	var accept = document.getElementById('btn_loadmodelaccept');
+	settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+	
+	//get an object of all the input settings from the user
+	data = {}
+	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+		var element_data = element.value;
+		if (element.getAttribute("data_type") == "int") {
+			element_data = parseInt(element_data);
+		} else if (element.getAttribute("data_type") == "float") {
+			element_data = parseFloat(element_data);
+		} else if (element.getAttribute("data_type") == "bool") {
+			element_data = (element_data == 'on');
+		}
+		data[element.id.split("|")[1].replace("_value", "")] = element_data;
+	}
+	data = {...data, ...selected_model_data};
+	
+	data['plugin'] = document.getElementById("modelplugin").value;
+	
+	socket.emit("load_model", data);
+	closePopups();
+}
+
+function initalizeTooltips() {
+	const tooltip = $e("span", document.body, {id: "tooltip-text", "style.display": "none"});
+	let tooltipTarget = null;
+
+	function alterTooltipState(target, specialClass=null) {
+		tooltipTarget = target;
+		tooltip.style.display = target ? "block" : "none";
+		tooltip.className = specialClass || "";
+	}
+
+	function registerElement(el) {
+		// el should have attribute "tooltip"
+		let text = el.getAttribute("tooltip");
+
+		el.addEventListener("mouseenter", function(event) {
+			if (!el.hasAttribute("tooltip")) return;
+			tooltip.innerText = text;
+			let specialClass = "tooltip-standard";
+
+			// Kinda lame
+			if (this.classList.contains("context-token")) specialClass = "tooltip-context-token";
+
+			alterTooltipState(el, specialClass);
+		});
+
+		el.addEventListener("mouseleave", function(event) {
+			alterTooltipState(null);
+		});
+	}
+
+	const xOffset = 10;
+	const yOffset = 15;
+
+	document.addEventListener("mousemove", function(event) {
+		if (!tooltipTarget) return;
+
+		let [x, y] = [event.x, event.y];
+
+		// X + the tooltip's width is the farthest point right we will display;
+		// let's account for it. If we will render outside of the window,
+		// subtract accordingly.
+		let xOverflow = (x + tooltip.clientWidth) - window.innerWidth;
+		if (xOverflow > 0) x -= xOverflow;
+
+		if (xOverflow + xOffset < 0) x += xOffset;
+
+		// Same for Y!
+		let yOverflow = (y + tooltip.clientHeight) - window.innerHeight;
+		if (yOverflow > 0) y -= yOverflow;
+
+		if (yOverflow + yOffset < 0) y += yOffset;
+
+		tooltip.style.left = `${x}px`;
+		tooltip.style.top = `${y}px`;
+	});
+
+	// Inital scan
+	for (const element of document.querySelectorAll("[tooltip]")) {
+		registerElement(element);
+	}
+
+	// Use a MutationObserver to catch future tooltips
+	const observer = new MutationObserver(function(records, observer) {
+		for (const record of records) {
+			
+			if (record.type === "attributes") {
+				// Sanity check
+				if (record.attributeName !== "tooltip") continue;
+				registerElement(record.target);
+				continue;
+			}
+			
+			// If we remove the tooltip target, stop showing the tooltip. Maybe a little ineffecient.
+			if (!document.body.contains(tooltipTarget)) alterTooltipState(null);
+
+			for (const node of record.addedNodes) {
+				if (node.nodeType !== 1) continue;
+
+				if (node.hasAttribute("tooltip")) registerElement(node);
+
+				// Register for descendants (Slow?)
+				for (const element of node.querySelectorAll("[tooltip]")) {
+					registerElement(element);
+				}
+			}
+		}
+	});
+	observer.observe(document.body, {
+		childList: true,
+		subtree: true,
+		attributeFilter: ["tooltip"],
+	});
+}
+
+// Must be done before any elements are made; we track their changes.
+console.log(document.body);
+initalizeTooltips();
\ No newline at end of file
diff --git a/static/custom.css b/static/custom.css
index 3e266701..ffa6f44f 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -1728,4 +1728,606 @@ body.connected .popupfooter, .popupfooter.always-available {
 
 .wientry > .input-token-usage {
 	bottom: 8px;
+}
+
+
+/*----------------------------------------------COPY FROM UI2-----------------------------------------------------------------------*/
+:root {
+	/*General*/
+	--background: #252e3b;
+	--gamescreen_background: #111820;
+    --input_background: #111820;
+
+    --text: #e0e0e0;
+    --text_to_ai_color: #e0e0e0;
+    --text_edit: #9cc3ee;
+	--action_mode_input: #33E978;
+
+	--statusbar_color: #eedcb880;
+	--statusbar_text_color: #e0e0e0;
+	--scrollbar-color: #2f3b4bdb;
+
+	/*Buttons*/
+		/*General*/
+	--enabled_button_text: #e0e0e0;
+    --enabled_button_background_color: #2d3d52;
+    --enabled_button_border_color: #253446;
+
+	--disabled_button_text: #303030;
+    --disabled_button_background_color: #495762;
+    --disabled_button_border_color: #686c68;
+
+		/*Home Tab*/
+	--button_text: #e0e0e0;
+	--button_background: #283445;
+		
+		/*Alternate Button*/
+	--alternate_button_text: #e0e0e0;
+	--alternate_button_background: #283445;
+
+	/*Buttons -> Icon Button*/
+	--icon_button_background:;
+	--icon_button_color:;
+	--icon_button_border_color:;
+
+	/*Context Menu*/
+	--context_menu_text:;
+	--context_menu_background:;
+	--context_menu_border:;
+	--context_menu_division:;
+	--context_menu_hover_text:;
+	--context_menu_hover_background:;
+
+	/*Sequence, AKA Gens Per Action*/
+	--sequence_area_background: #111820;
+    --sequence_background: #eedcb8;
+	--sequence_text: #e0e0e0;
+
+	/*Side Menus*/
+	--tab_color: #243047;
+
+	--flyout_background: #18222d;
+	--flyout_background_pinned: #18222d;
+
+	--setting_background: #273141;
+    --setting_text: #e0e0e0;
+
+	--sample_order_select_color: #1f2934;
+	--sample_order_select_color_text: #eedcb8;
+
+	--dropdown_text: #e0e0e0;
+	--dropdown_background: #212935;
+
+	--rangeslider_background_color: #1f2934;
+	--rangeslider_color: #1f2934;
+	--rangeslider_circle_color: #404d64;
+
+	--help_icon: #7c8389;
+	--tooltip_text: #e0e0e0;
+    --tooltip_background: #303c50;
+	--setting_category_help_text_color: #E0E0E0;
+	
+	--setting_footer_border_color: #334552;
+	--setting_footer_text_color: #e0e0e0;
+	--setting_footer_background_color: #18222d;
+
+	/*Substitution Card*/
+	--substitution_card_input_border:;
+	--substitution_card_input_background:;
+
+	/*Palette Card*/
+	--palette_card_background: #273141;
+	--palette_card_text: #e0e0e0;
+	--palette_table_border: #607c90;
+
+	/*World Info*/
+	--wi_card_border_color: #334552;
+    --wi_card_border_color_to_ai: #eedcb880;
+
+    --wi_card_bg_color: #223040;
+	--wi_card_text_color: #e0e0e0;
+
+    --wi_card_tag_bg_color: #1d2835;
+	--wi_card_tag_text_color: #e0e0e0;
+	
+    --wi_tag_color: #283445;
+	--wi_tag_text_color: #e0e0e0;
+	
+	/*Popup*/
+	--popup_background_color: #1a2530;
+    --popup_title_bar_color: #283445;
+	--popup_title_bar_color_text: #e0e0e0;
+
+    --popup_item_color: #1a2530;
+	--popup_item_color_text: #e0e0e0;
+
+	--popup_hover_color: #1e2733;
+	--popup_hover_color_text: #e0e0e0;
+	--popup_selected_color: #242d3c;
+	--popup_selected_color_text: #eedcb8;
+
+	--popup_button_color: #283445;
+	--popup_button_color_text: #e0e0e0;
+	--popup_cancel_button_color: #25364a;
+	--popup_cancel_button_color_text: #e0e0e0;
+
+	--error: #19242c;
+	--error_text: #e0e0e0;
+	--error_title: #25364a;
+	--error_title_text: #e0e0e0;
+	
+	/*Context Bar Colors*/
+	--context_colors_memory: #04325c;
+	--context_colors_authors_notes: #165a62;
+	--context_colors_world_info: #1864a3;
+	--context_colors_prompt: #868686;
+	--context_colors_game_text: #63710e;
+	--context_colors_submit: #ffffff00;
+	--context_colors_unused: #ffffff24;
+	--context_colors_soft_prompt: #141414;
+	--context_colors_genre: #2c5c88;
+
+    /*Parameters*/
+	--scrollbar-size: 6px;
+	--palette_card_shadow: 0;
+	--wi_card_shadow: 0;
+	--light_shadow_value: 0;
+	--left_menu_strong_shadow: 0;
+	--right_menu_light_shadow: 0;
+	--right_menu_strong_shadow: 0;
+	--context_menu_shadow: var(--wi_card_shadow);	
+	--radius_inputbox: 2px;
+	--radius_unpinned_menu: 2px;
+	--radius_sequence: 5px;
+	--radius_settings_background: 2px;
+	--radius_button: 2px;
+	--radius_alternate_button: 2px;
+	--radius_item_popup: 2px;
+	--radius_wi_card: 5px;
+	--radius_palette_card: 5px;
+	--radius_settings_button: 2px;
+	--tabs_rounding: 2px;
+	--radius_context_menu: 2px;
+	--radius_context_menu_hover: 2px;
+	--radius_genre_tag: 2px;
+	--radius_tooltip: 2px;
+
+
+
+
+
+/*----------------VARIABLES--------------------*/
+	--flyout_menu_closed_width: 0px;
+	--setting_menu_closed_width_no_pins_width:  0px;
+	--story_options_size: 30%;
+	--story_pinned_areas_left:"menuicon options gamescreen lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_areas_right:"menuicon gamescreen options lefticon"
+								 "menuicon theme theme lefticon"
+								 "menuicon inputrow inputrow lefticon";
+	--story_pinned_area_widths_left: 30pxvar(--story_options_size) auto 30px;
+	--story_pinned_area_widths_right: 30pxautovar(--story_options_size) 30px;
+	--story_pinned_areas:var(--story_pinned_areas_left);
+	--story_pinned_area_widths:var(--story_pinned_area_widths_left);
+	--font_size_adjustment: 0px;
+	--game_screen_font_size_adjustment: 1;}
+}
+
+/*---------------------------------- Popups -------------------------------------------------*/
+@media only screen and (max-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 10%;
+		z-index: 999;
+		width: 80%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+@media only screen and (min-aspect-ratio: 7/5) {
+	.popup {
+		position: absolute;
+		top: 10vh;
+		left: 25%;
+		z-index: 999;
+		width: 50%;
+		height: 80vh;
+		border-radius: 15px;
+		box-shadow: var(--popup_shadow);
+		background-color: var(--popup_background_color);
+		display: flex;
+		flex-direction: column;
+		overflow: hidden;
+	}
+}
+
+.popup .title {
+	width: 100%;
+	background-color: var(--popup_title_bar_color);
+	color: var(--popup_title_bar_color_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+.popup .action_button {
+	background-color: var(--popup_button_color);
+	color: var(--popup_button_color_text);
+}
+
+.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	color: var(--popup_item_color_text);
+	
+}
+
+#modelspecifier, .popup .model_item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+	display: grid;
+	grid-template-areas: "folder_icon delete_icon edit_icon rename_icon file gpu_size warning_icon downloaded_icon";
+	grid-template-columns: 30px 0px 0px 0px auto 50px 30px 30px;
+	
+}
+
+.popup .model_item .folder_icon {
+	grid-area: folder_icon;
+}
+
+.popup .model_item .edit_icon {
+	grid-area: edit_icon;
+}
+
+.popup .model_item .rename_icon {
+	grid-area: rename_icon;
+}
+
+.popup .model_item .delete_icon {
+	grid-area: delete_icon;
+}
+
+.popup .model_item .model {
+	cursor: pointer;
+	grid-area: file;
+}
+
+.popup .header {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	padding: 2px;
+}
+
+.popup .item {
+	width: 98%;
+	background-color: var(--popup_item_color);
+	color: var(--popup_item_color_text);
+	margin: 5px 0 5px 1%;
+	border-radius: var(--radius_item_popup);
+	padding: 2px;
+}
+
+.popup .item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .item.selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.popup .popup_load_cancel {
+	text-align: center;
+	vertical-align: bottom;
+	color: var(--popup_title_bar_color_text);
+	background-color: var(--popup_title_bar_color);
+	padding: 0 10px 0 10px;
+}
+
+
+.popup_load_cancel_button {
+	color: var(--popup_cancel_button_color_text);
+	border-color: var(--popup_cancel_button_color_text);
+	background-color: var(--popup_cancel_button_color);
+	vertical-align: bottom;
+	display: inline;
+}
+
+.table-header-container {
+	display: flex;
+	justify-content: space-between;
+	cursor: pointer;
+}
+
+.table-header-sort-icon {
+	margin-right: 10px;
+	margin-top: 2px;
+}
+
+.table-header-label {
+	margin-top: 4px;
+}
+
+#error_message.popup {
+	background-color: var(--error);
+	color: var(--error_text);
+	overflow: hidden;
+}
+
+#error_message .title {
+	width: 100%;
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+	text-align: center;
+	font-size: calc(1.3em + var(--font_size_adjustment));
+}
+
+#error_message.popup .btn-primary {
+	background-color: var(--error);
+	color: var(--error_text);
+	border-color: var(--error_text);
+}
+
+
+#error_message .popup_load_cancel {
+	background-color: var(--error_title);
+	color: var(--error_title_text);
+}
+
+
+#error_message.popup .popup_list_area {
+	overflow-x: hidden;
+	overflow-y: scroll;
+	flex-grow: 1;
+	flex-shrink: 1;
+	flex-basis: auto;
+	background-color: var(--error);
+	color: var(--error_text);
+}
+
+.breadcrumbitem {
+	padding: 5px 10px 5px 10px;
+	color: #ffffff;
+	background-color: transparent;
+	border: none;
+	
+	-moz-transition: background-color 0.25s ease-in;
+	-o-transition: background-color 0.25s ease-in;
+	-webkit-transition: background-color 0.25s ease-in;
+	transition: background-color 0.25s ease-in;
+}
+
+.breadcrumbitem:hover {
+	cursor: pointer;
+	background-color: #688f1f;
+}
+
+.loadmodelsettings {
+	overflow-y: auto;
+	max-height: 50%;
+}
+
+
+/*----------------------------- Model Load Popup ------------------------------------------*/
+
+#specspan, .popup_list_area .model_item .model {
+	grid-area: file;
+	display: grid;
+	grid-template-areas: "item gpu_size";
+	grid-template-columns: auto 95px;
+	cursor: pointer;
+}
+
+#specspan {
+	grid-template-columns: auto 100px !important;
+	cursor: auto !important;
+}
+
+#model-spec-usage {
+	position: relative;
+	left: -20px;
+}
+
+.popup .model_item:hover {
+	background-color: var(--popup_hover_color);
+	color: var(--popup_hover_color_text);
+}
+
+.popup .model_item .selected {
+	background: var(--popup_selected_color);
+	color: var(--popup_selected_color_text);
+}
+
+.model_setting_container {
+	display: grid;
+	grid-template-areas: "label label"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px 16px 19px;
+	grid-template-columns: auto 40px;
+	row-gap: 0.2em;
+	border: 1px;
+	margin: 2px;
+}
+
+.model_setting_minlabel {
+	grid-area: minlabel;
+	padding-top: 3px;
+	color: var(--popup_title_bar_color_text);
+	overflow: hidden;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_maxlabel {
+	color: var(--popup_title_bar_color_text);
+	padding-top: 3px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.model_setting_label {
+	color: var(--popup_title_bar_color_text);
+	grid-area: label;
+	overflow: hidden;
+	text-align: left;
+}
+
+.model_setting_value {
+	color: var(--popup_title_bar_color_text);
+	text-align: left;
+	grid-area: label;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.model_setting_value:focus {
+	color: var(--text_edit);
+}
+
+.model_setting_item {
+
+	grid-area: item;
+	overflow: hidden;
+}
+
+.model_setting_item_input {
+	width:95%;
+}
+
+@font-face {
+  font-family: 'Material Icons Outlined';
+  font-style: normal;
+  src: url(/static/MaterialIconsOutlined-Regular.otf) format('opentype');
+}
+
+.material-icons-outlined {
+  font-family: 'Material Icons Outlined';
+  font-weight: normal;
+  font-style: normal;
+  font-size: calc(24px + var(--font_size_adjustment));  /* Preferred icon size */
+  display: inline-block;
+  line-height: 1;
+  text-transform: none;
+  letter-spacing: normal;
+  word-wrap: normal;
+  white-space: nowrap;
+  direction: ltr;
+
+  /* Support for all WebKit browsers. */
+  -webkit-font-smoothing: antialiased;
+  /* Support for Safari and Chrome. */
+  text-rendering: optimizeLegibility;
+
+  /* Support for Firefox. */
+  -moz-osx-font-smoothing: grayscale;
+
+  /* Support for IE. */
+  font-feature-settings: 'liga';
+}
+
+.material-icons-outlined.cursor:hover{
+	filter: brightness(85%);
+}
+
+.setting_label .helpicon {
+	color: var(--help_icon);
+	cursor: help;
+	font-size: calc(14px + var(--font_size_adjustment)) !important;
+	flex: auto;
+	width: 15px;
+	align-self: flex-end;
+
+	line-height: inherit;
+	border-radius: inherit;
+	margin-right: inherit;
+	padding: inherit;
+	background: inherit;
+	border: inherit;
+	text-decoration: inherit;
+	
+}
+
+#tooltip-text {
+	content: attr(tooltip);
+	position: fixed;
+	transition: opacity  0s linear 0.5s;
+	white-space: normal;
+	border-radius: var(--radius_tooltip);
+
+	opacity: 1;
+	
+	padding: 7px;
+	color: var(--tooltip_text);
+	background-color: var(--tooltip_background);
+
+	pointer-events: none;
+	z-index: 9999999;
+}
+
+.tooltip-standard {
+	border: 1px ridge grey;
+	font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
+	width: min-context;
+	max-width: 25%;
+	/*margin-right: -3px;*/
+}
+
+.tooltip-context-token {
+	border: none;
+	font-family: monospace;
+	max-width: min-content;
+}
+
+
+/* Mobile tooltips */
+@media (pointer: coarse), (hover: none) {
+	[tooltip]:after {
+		opacity: 0;
+		content: "";
+	}
+
+	[tooltip]:hover::after {
+		content: attr(tooltip);
+		position: fixed;
+
+		top: calc(var(--mouse-y) * 100vh);
+		left: calc(var(--mouse-x) * 100vw);
+		transform: translate(var(--tooltip_x), var(--tooltip-y));
+		transition: opacity  0s linear 0.5s;
+		opacity: 1;
+		
+
+		padding: 0px 2px;
+		background-color: rgba(0, 0, 0, 0.6);
+
+		pointer-events: none;
+		z-index: 9999999;
+	}
+}
+
+.popup .model_item .model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
 }
\ No newline at end of file
diff --git a/static/koboldai.css b/static/koboldai.css
index b70c6877..85aea08a 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2012,6 +2012,11 @@ body {
 	grid-area: file;
 }
 
+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
+}
+
 .popup .header {
 	width: 98%;
 	background-color: var(--popup_item_color);
diff --git a/static/koboldai.js b/static/koboldai.js
index 399e52cf..99595879 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1587,6 +1587,12 @@ function show_model_menu(data) {
 			selected_model_data = obj;
 			//send the data to the server so it can figure out what data we need from the user for the model
 			socket.emit('select_model', obj); 
+			
+			//clear out the selected item and select this one visually
+			for (const element of document.getElementsByClassName("model_menu_selected")) {
+				element.classList.remove("model_menu_selected");
+			}
+			this.closest(".model_item").classList.add("model_menu_selected");
 		}
 		
 		//name text
diff --git a/templates/index.html b/templates/index.html
index af99390f..28f46a4c 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -18,7 +18,7 @@
 	<script src="static/bootstrap.min.js"></script>
 	<script src="static/bootstrap-toggle.min.js"></script>
 	<script src="static/rangy-core.min.js"></script>
-	<script src="static/application.js?ver=1.18.1f"></script>
+	<script defer src="static/application.js?ver=1.18.1f"></script>
 	<script src="static/favicon.js"></script>
 </head>
 <body>
@@ -283,59 +283,7 @@
 			</div>
 		</div>
 	</div>
-	<div class="popupcontainer hidden" id="loadmodelcontainer">
-		<div class="loadpopup">
-			<div class="popuptitlebar">
-				<div class="popuptitletext">Select A Model To Load</div>
-			</div>
-			<div id="loadmodellistbreadcrumbs">
-				
-			</div>
-			<div id="loadmodellistcontent" style="overflow: auto; height: 300px;">
-			</div>
-			<div class="popupfooter">
-				<input class="form-control hidden" type="text" placeholder="Enter the URL of the server (For example a trycloudflare link)" id="modelurl" onchange="check_enable_model_load()">
-				<input class="form-control hidden" type="text" placeholder="key" id="modelkey" onblur="socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});">
-				<input class="form-control hidden" type="text" placeholder="Model Path or Hugging Face Name" id="custommodelname" menu="" onblur="socket.send({'cmd': 'selectmodel', 'data': $(this).attr('menu'), 'path_modelname': $('#custommodelname')[0].value});">
-			</div>
-			<div class="popupfooter">
-				<select class="form-control hidden" id="oaimodel"><option value="">Select Model(s)</option></select>
-			</div>
-			<div class="popupfooter hidden" id=modellayers>
-				<div class='settingitem' style="width:100%">
-					<div class='settinglabel'>
-						<div class="justifyleft">
-							GPU/Disk Layers
-							<span class="helpicon">?
-								<span class="helptext">Number of layers to assign to GPUs and to disk cache. Remaining layers will be put into CPU RAM.</span>
-							</span>
-						</div>
-						<div class="justifyright" id="gpu_layers_current">0</div>
-					</div>
-					<div id=model_layer_bars style="color: white">
-						
-					</div>
-					<input type=hidden id='gpu_count' value=0/>
-					<div class="settingminmax">
-						<div class="justifyleft">
-							0
-						</div>
-						<div class="justifyright" id="gpu_layers_max">
-							24
-						</div>
-					</div>
-				</div>
-			</div>
-			<div class="popupfooter">
-				<button type="button" class="btn btn-primary" id="btn_loadmodelaccept">Load</button>
-				<button type="button" class="btn btn-primary" id="btn_loadmodelclose">Cancel</button>
-				<div class="box flex-push-right hidden" id=use_gpu_div>
-					<input type="checkbox" data-toggle="toggle" data-onstyle="success" id="use_gpu" checked>
-					<div class="box-label">Use GPU</div>
-				</div>
-			</div>
-		</div>
-	</div>
+	
 	<div class="popupcontainer hidden" id="spcontainer">
 		<div id="sppopup">
 			<div class="popuptitlebar">
@@ -513,6 +461,12 @@
 			</div>
 		</div>
 	</div>
+	<!------------- Pop-Ups ------------------------------->
+	{% include 'popups.html' %}
 	
+	<!------------- Templates ------------------------------->
+	<div class="hidden">
+		{% include 'templates.html' %}
+	</div>
 </body>
 </html>

From a1036465af02cefda32af06d4d3a04b0161aa118 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 12:46:02 -0400
Subject: [PATCH 045/102] Add warning about command line changes and new
 modular backend

---
 data/one_time_messages.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/data/one_time_messages.json b/data/one_time_messages.json
index 3062827b..7485fd15 100644
--- a/data/one_time_messages.json
+++ b/data/one_time_messages.json
@@ -8,5 +8,10 @@
 		"id": 2,
 		"title": "Changes since last version",
 		"message": "<h2>New Features</h2>\n<h3>Phrase Biasing</h3>\nThere is now a Phrase Biasing implementation under Settings -> Biasing. You can now encourage or discourage the AI to generate words or phrases (without needing to use a userscript)\n<h3>Context viewer</h3>\nThe Context Viewer allows you to see what is sent to the AI. Given that only so much text can be read by the AI at a time, it's useful to know exactly what it's looking at.\n<h3>Story Commentary</h3>\nStory Commentary allows custom characters to speak their mind on your story. This can be configured under Settings -> Story Commentary. Characters can be added as World Into entries with a type of \"Commentator\".\n<h3>New Chat UI (Experimental)</h3>\nThis new interface for Chat Mode is a more visually \"chat like\" in comparison to the old text-based mode. To activate it, ensure both Chat Mode (Home -> Game Mode) and Experimental UI (Interface -> Experimental UI) are enabled, then change the Chat Style (Interface -> Chat Style) to \"Messages\". Similarly, to the story commentators, characters can be defined in the World Info menu; if a character's name matches a chat character defined in the World Info menu, the image on the character entry will be used as an icon. \n<h3>Tweaks</h3>\nTweaks allow small UI changes to be mixed and matched to create a more personalized interface.\n<h3>Attention Bias (Experimental)</h3>\nAttention Bias hopes to cause some parts of the context to be internally weighed more than others in self attention. This is very experimental, and only works on OPT-based models for now.\n<h3>Genre</h3>\nThe genre menu (Author's Note -> Genre) prepends genre information to the context. You can either choose from preset genres or write your own. Works better on models trained with genre/tag information, including most new models in the model picker.\n<h3>World Info generation</h3>\nWorld Info entries can now have their text generated automatically from a title and type. Powered by whatever model you have active, so effectiveness will vary with model.\n<h3>Drag and drop import</h3>\nImportable files can now be dragged into the UI to load them.\n<h3>NovelAI lorebook/card support</h3>\nNovelAI lorebooks and cards can now be imported as World Info. If a card is uploaded, the PNG will be used as the World Info image.\n<h3>Finder (Ctrl+K)</h3>\nAllows jumping to various UI elements and performing actions quickly. Mode can be adjusted by pressing the clicking the mode icon or with hotkeys on an empty search box (Search: '#', World Info: '>', Inference Scratchpad: '!', Image Prompting: '?').\n<h3>Club import wizard</h3>\nPrompts imported from aetherroom.club with placeholders will now show a setup prompt where you can input the value of these placeholders.\n<h3>Context menu</h3>\nA context menu has been added and is available in several areas. Give it a try by right-clicking on the main text area.\n<h3>Substitutions</h3>\nSubstitutions allow phrases to be replaced if you or the AI input them into the story. The default Substitutions are disabled and can be enabled with the pencil icon to the right of the entry.\n<h3>Inference scratchpad</h3>\nThe Inference Scratchpad is a way of prompting the AI outside in isolation; the AI will not see anything in your story, and nothing the AI responds with will be added to the story. This can be useful in scenarios where you wish to use the AI in a more generic way. For example, you could prompt it with something like \"List of fantasy names:\" to receive such a list.\n<h3>Error notifications</h3>\nErrors are generally less opaque to the user. Client sided errors and many server errors will show a notification detailing the error.\n<h3>Ctrl+Click to jump to World Info entry</h3>\nHolding Ctrl while clicking on a mention of a World Info tag will bring you to the entry.\n<h3>Model picker indicators</h3>\nThe model picker now has indicators showing if a model is downloaded, may achieve poor quality, or may not load on your system.\n<h3>More shortcuts</h3>\nPress Ctrl+? to view them.\n<h3>Image Generation</h3>\nYou can now generate flavor images based on the game text at each action. In the settings menu in the home tab, you can click generate image to create an image based on the current text. It will appear below the image. Hovering on the image will give you the prompt used to generate the image. You can click on the text of previous actions to see the image associated with that action and can right click on the image and hit retry to generate a new image based on that action.\nSettings for how/where the image is generated are in the left flyout menu under interface, image\n<h3>Text to speech (Experimental)</h3>\nText to speech is now available. To enable it go to the settings menu, enable experimental ui, then enable generate audio. Audio will be generated for your actions. Play buttons will appear next to the submit button, and right clicking an action will give you a new speak option to start reading from that point.\n<h3>UI Mode<h3>\nIn response to feedback, we've added different UI mode levels from simple to power user. Advanced hides some of the less used options, while Power User shows everything. Simple is very much a work in progress, but it intends to simplify the majority of settings to 3 sliders. Feel free to play with it but don't expect good results yet.\n<h3>Presets</h3>\nPresets are now here. Community presets are pre-loaded in KoboldAI and can be selected from the settings tab in the settings menu, or from the home screen. In addition, you can save your own presets and share them with others (or send them to use for future inclusion). Presets are saved in the presets folder\n<h3>Alt Text Gen</h3>\nWith this setting on the system will insert world info text the sentence before the word that triggers it in the AI text. This should make the AI pay more attention to it and make it more likely to influence the output.\n<h3>Alt Multi Gen</h3>\nIf set multiple generations will be generated sequentially rather than at once. This reduces the amount of VRAM required and can let you generate multiple story options with more demanding models at the potential expense of speed\n<h3>Beep on Complete</h3>\nThere is now an option in the settings menu, interface tab called been on complete. If set the browser will beep when generation is complete. Useful for slow systems\n<h3>Privacy Screen (Experimental)</h3>\nBy hitting ctrl+L the screen will be blurred for all users until the password is entered and unlock is clicked (password is set in the settings menu, interface tab.\n<h3>Change Game Text Size</h3>\nGame text can be adjusted to any size\n<h3>No double spaces</h3>\nWhen set double spaces will be replaced by single spaces\n<h3>Themes</h3>\nWe now have a theming engine. Themes can come in 3 flavors. Palette themes use a more basic theming system entirely in the UI. Select the colors from the Palette section and things will change. Advanced themes can have various variables set manually (click the advanced theme button to see). These allow you to go a level deeper than the palette system. Finally, we have custom themes. These are custom CSS code that can do almost anything. All themes can be saved and shared. Saved themes are stored in the themes folder\n<h3>Auto Memory (Experimental)</h3>\nThe start of auto-memory is in place and we are looking for feedback. It currently generates the summary but does not put it in memory (though you can copy-paste it). To see it, turn on experimental ui, go to the story menu, memory tab and click generate under auto-memory. \n<h3>General Notes</h3>\nIf you want a place to write stuff down that saves with the story but doesn't affect it, that's what the notes tab is for. It is found under the story menu, Notes tab\n<h3>W++ (or SBF)</h3>\nIn world info entries you can turn on w++ mode. This will allow you to enter data in the W++ format without having to actually write it.\n<h3>World Info Titles</h3>\nWorld info entries now have titles on them to make it easier to find the one you want. Soon the world info entries will be collapsed to just the title to make navigation easier\n<h3>Download/Upload world info folders</h3>\nWorld info folders can now be downloaded and/or uploaded. This will let you share world info easier.\n<h3>Game Text in AI Context</h3>\nGame text that will be in the AI's context is now bold in the game screen. This will let you easily see where the AI will stop remembering your game (anything not bolded is \"forgotten\")\n<h3>World info context<h3>\nText that triggers a world info entry will now be italicized. Hovering over that text will cause a tooltip to show what the AI text is that will be added.\n<h3>Updated help text</h3>\nHelp text has been expanded throughout the UI.\n<h3>Context Bar</h3> at the bottom of the story menu is a bar that shows how much of the AI's context is in use, and by what. Different colors correspond to different data types (actions, memory, world info, etc)\n<br>\n<br>\n<h2>Improvements</h2>\n<h3>Author's Note</h3>\nThe author's note is now inserted between sentences a configurable distance from the end of the story.  This should improve the coherence of generated text while keeping the author's note relevant."
+	},
+	"3": {
+		"id": 3,
+		"title": "Changes since last version",
+		"message": "<h2>New Features</h2>\n<h3>Modular Model Backends</h3>Model loading is now accomplished via seperate model backend files. This will allow KoboldAI to more easily add in new model backends (examples, 4-bit, GGML, whatever developers want to add) without having to do significant code rework.<h3>Rework of command line arguments</h3> --breakmodel command line arguments have been deleted and if you use those you will have to pass through --model_backend and --model_parameters."
 	}
 }
\ No newline at end of file

From 9df1f03b12ffa2513b15472a96338483178fe760 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 14:28:36 -0400
Subject: [PATCH 046/102] Fix for custom huggingface model menu entry

---
 aiserver.py                     |  36 ++++----
 modeling/inference_models/hf.py | 154 ++++++++++++++++++--------------
 static/application.js           |  23 ++++-
 static/koboldai.js              |  23 ++++-
 4 files changed, 139 insertions(+), 97 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b4aad4e7..fe6d7606 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -233,7 +233,7 @@ model_menu = {
     "mainmenu": [
         MenuPath("Load a model from its directory", "NeoCustom"),
         MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
-        MenuFolder("Load custom model from Hugging Face", "customhuggingface"),
+        MenuModel("Load custom model from Hugging Face", "customhuggingface", ""),
         MenuFolder("Adventure Models", "adventurelist"),
         MenuFolder("Novel Models", "novellist"),
         MenuFolder("Chat Models", "chatlist"),
@@ -6135,7 +6135,7 @@ def UI_2_select_model(data):
             valid_loaders = {}
             for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
                 valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-            emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"})
+            emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
@@ -6149,24 +6149,20 @@ def UI_2_select_model(data):
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
-    
-    
-    #We've selected a menu
-    if data['model'] in model_menu:
-        sendModelSelection(menu=data['model'])
-    #We've selected a custom line
-    elif data['menu'] in ("NeoCustom", "GPT2Custom"):
-        get_model_info(data['menu'], directory=data['display_name'])
-    #We've selected a custom menu folder
-    elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data:
-        sendModelSelection(menu=data['model'], folder=data['path'])
-    #We've selected a custom menu
-    elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"):
-        sendModelSelection(menu=data['model'], folder="./models")
-    else:
-        #We now have some model we want to potentially load.
-        #First we need to send the client the model parameters (layers, etc)
-        get_model_info(data['model'])
+
+
+
+
+#==================================================================#
+# Event triggered when user changes a model parameter and it's set to resubmit
+#==================================================================#
+@socketio.on('resubmit_model_info')
+@logger.catch
+def UI_2_resubmit_model_info(data):
+    valid_loaders = {}
+    for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data)
+    emit("selected_model_info", {"model_backends": valid_loaders})
 
 #==================================================================#
 # Event triggered when user loads a model
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 6f848fa9..eff3d1ce 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -33,95 +33,111 @@ class HFInferenceModel(InferenceModel):
         except:
             return False
         
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         requested_parameters = []
         if not self.hf_torch:
             return []
-        if model_path is not None and os.path.exists(model_path):
-            self.model_config = AutoConfig.from_pretrained(model_path)
-        elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
-            self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
-        else:
-            self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
-        layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
-        if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
-            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
-                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
-                    temp = json.load(f)
-                    break_values = temp['layers'] if 'layers' in temp else [layer_count]
-                    disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
+        if model_name == 'customhuggingface':
+            requested_parameters.append({
+                                        "uitype": "text",
+                                        "unit": "text",
+                                        "label": "Huggingface Model Name",
+                                        "id": "custom_model_name",
+                                        "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "",
+                                        "check": {"value": "", 'check': "!="},
+                                        "tooltip": "Model name from https://huggingface.co/",
+                                        "menu_path": "",
+                                        "refresh_model_inputs": True,
+                                        "extra_classes": ""
+                                    })
+        
+        if model_name != 'customhuggingface' or "custom_model_name" in parameters:
+            model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name
+            if model_path is not None and os.path.exists(model_path):
+                self.model_config = AutoConfig.from_pretrained(model_path)
+            elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))):
+                self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache")
             else:
-                break_values = [layer_count]
-                disk_blocks = 0
-            
-            break_values = [int(x) for x in break_values if x != '' and x is not None]
-            gpu_count = torch.cuda.device_count()
-            break_values += [0] * (gpu_count - len(break_values))
-            if disk_blocks is not None:
-                break_values += [int(disk_blocks)]
-            for i in range(gpu_count):
+                self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
+            layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
+                if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                    with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                        temp = json.load(f)
+                        break_values = temp['layers'] if 'layers' in temp else [layer_count]
+                        disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0
+                else:
+                    break_values = [layer_count]
+                    disk_blocks = 0
+                
+                break_values = [int(x) for x in break_values if x != '' and x is not None]
+                gpu_count = torch.cuda.device_count()
+                break_values += [0] * (gpu_count - len(break_values))
+                if disk_blocks is not None:
+                    break_values += [int(disk_blocks)]
+                for i in range(gpu_count):
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "{} Layers".format(torch.cuda.get_device_name(i)),
+                                                    "id": "{}_Layers".format(i),
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": break_values[i],
+                                                    "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
                 requested_parameters.append({
                                                 "uitype": "slider",
                                                 "unit": "int",
-                                                "label": "{} Layers".format(torch.cuda.get_device_name(i)),
-                                                "id": "{}_Layers".format(i),
+                                                "label": "CPU Layers",
+                                                "id": "CPU_Layers",
                                                 "min": 0,
                                                 "max": layer_count,
                                                 "step": 1,
                                                 "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
                                                 "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                                "default": break_values[i],
-                                                "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
+                                                "default": layer_count - sum(break_values),
+                                                "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",
                                                 "refresh_model_inputs": False
                                             })
-            requested_parameters.append({
-                                            "uitype": "slider",
-                                            "unit": "int",
-                                            "label": "CPU Layers",
-                                            "id": "CPU_Layers",
-                                            "min": 0,
-                                            "max": layer_count,
-                                            "step": 1,
-                                            "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
-                                            "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                            "default": layer_count - sum(break_values),
-                                            "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.",
-                                            "menu_path": "Layers",
-                                            "extra_classes": "",
-                                            "refresh_model_inputs": False
-                                        })
-            if disk_blocks is not None:
+                if disk_blocks is not None:
+                    requested_parameters.append({
+                                                    "uitype": "slider",
+                                                    "unit": "int",
+                                                    "label": "Disk Layers",
+                                                    "id": "Disk_Layers",
+                                                    "min": 0,
+                                                    "max": layer_count,
+                                                    "step": 1,
+                                                    "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                    "check_message": "The sum of assigned layers must equal {}".format(layer_count),
+                                                    "default": disk_blocks,
+                                                    "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                    "menu_path": "Layers",
+                                                    "extra_classes": "",
+                                                    "refresh_model_inputs": False
+                                                })
+            else:
                 requested_parameters.append({
-                                                "uitype": "slider",
-                                                "unit": "int",
-                                                "label": "Disk Layers",
-                                                "id": "Disk_Layers",
-                                                "min": 0,
-                                                "max": layer_count,
-                                                "step": 1,
-                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
-                                                "check_message": "The sum of assigned layers must equal {}".format(layer_count),
-                                                "default": disk_blocks,
-                                                "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.",
+                                                "uitype": "toggle",
+                                                "unit": "bool",
+                                                "label": "Use GPU",
+                                                "id": "use_gpu",
+                                                "default": False,
+                                                "tooltip": "Whether or not to use the GPU",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",
                                                 "refresh_model_inputs": False
                                             })
-        else:
-            requested_parameters.append({
-                                            "uitype": "toggle",
-                                            "unit": "bool",
-                                            "label": "Use GPU",
-                                            "id": "use_gpu",
-                                            "default": False,
-                                            "tooltip": "Whether or not to use the GPU",
-                                            "menu_path": "Layers",
-                                            "extra_classes": "",
-                                            "refresh_model_inputs": False
-                                        })
-                                        
+                                            
         
         return requested_parameters
         
@@ -153,7 +169,7 @@ class HFInferenceModel(InferenceModel):
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
-        self.model_name = parameters['id']
+        self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
     def unload(self):
diff --git a/static/application.js b/static/application.js
index 99a65ed7..ca445c5f 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4009,7 +4009,25 @@ function model_settings_checker() {
 		if (valid || missing_element) {
 			//if we are supposed to refresh when this value changes we'll resubmit
 			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
-				console.log("resubmit");
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+					var element_data = element.value;
+					if (element.getAttribute("data_type") == "int") {
+						element_data = parseInt(element_data);
+					} else if (element.getAttribute("data_type") == "float") {
+						element_data = parseFloat(element_data);
+					} else if (element.getAttribute("data_type") == "bool") {
+						element_data = (element_data == 'on');
+					}
+					data[element.id.split("|")[1].replace("_value", "")] = element_data;
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
 			}
 			if ('sum' in this.check_data) {
 				for (const temp of this.check_data['sum']) {
@@ -4099,9 +4117,6 @@ function selected_model_info(sent_data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
-		if (loader == sent_data['preselected']) {
-			modelplugin.value = sent_data['preselected'];
-		}
 		
 		//create the user input for each requested input
 		for (item of items) {
diff --git a/static/koboldai.js b/static/koboldai.js
index 99595879..dabbcda9 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1683,7 +1683,25 @@ function model_settings_checker() {
 		if (valid || missing_element) {
 			//if we are supposed to refresh when this value changes we'll resubmit
 			if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) {
-				console.log("resubmit");
+				//get an object of all the input settings from the user
+				data = {}
+				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
+				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+					var element_data = element.value;
+					if (element.getAttribute("data_type") == "int") {
+						element_data = parseInt(element_data);
+					} else if (element.getAttribute("data_type") == "float") {
+						element_data = parseFloat(element_data);
+					} else if (element.getAttribute("data_type") == "bool") {
+						element_data = (element_data == 'on');
+					}
+					data[element.id.split("|")[1].replace("_value", "")] = element_data;
+				}
+				data = {...data, ...selected_model_data};
+				
+				data['plugin'] = document.getElementById("modelplugin").value;
+				
+				socket.emit("resubmit_model_info", data);
 			}
 			if ('sum' in this.check_data) {
 				for (const temp of this.check_data['sum']) {
@@ -1773,9 +1791,6 @@ function selected_model_info(sent_data) {
 		modelpluginoption.innerText = loader;
 		modelpluginoption.value = loader;
 		modelplugin.append(modelpluginoption);
-		if (loader == sent_data['preselected']) {
-			modelplugin.value = sent_data['preselected'];
-		}
 		
 		//create the user input for each requested input
 		for (item of items) {

From 756a33c63e323372716a1321e649f01873ecb533 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:28:39 -0400
Subject: [PATCH 047/102] Added try loop on model backend so it will continue
 with other models.

---
 aiserver.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index fe6d7606..02ea2229 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -627,8 +627,11 @@ model_backend_code = {}
 model_backends = {}
 for module in os.listdir("./modeling/inference_models"):
     if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__':
-        model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
-        model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+        try:
+            model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
+            model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+        except:
+            logger.error("Model Backend {} failed to load".format(module))
         
 
 old_socketio_on = socketio.on
@@ -1572,7 +1575,7 @@ def general_startup(override_args=None):
             elif parameter['id'] not in arg_parameters:
                 arg_parameters[parameter] = parameter['default']
         if not ok_to_load:
-            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)")
+            logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()

From db30402c3bd01432f8a8a8239faee5c8e55991aa Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:30:36 -0400
Subject: [PATCH 048/102] Move RWKV to use Huggingface model backend

---
 aiserver.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 02ea2229..a1d548e9 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -371,16 +371,16 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'rwkvlist': [
-        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"),
-        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"),
-        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"),        
-        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), 
-        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), 
+        MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""),
+        MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""),
+        MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""),        
+        MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), 
+        MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), 
+        MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), 
+        MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), 
+        MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), 
+        MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), 
+        MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), 
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'apilist': [

From b21884fc31c556c81a89158123dfce18ba398640 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:34:15 -0400
Subject: [PATCH 049/102] Better error reporting

---
 aiserver.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index a1d548e9..7e8c09c8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -56,6 +56,7 @@ import html
 import argparse
 import sys
 import gc
+import traceback
 
 import lupa
 
@@ -630,8 +631,10 @@ for module in os.listdir("./modeling/inference_models"):
         try:
             model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
             model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
-        except:
+        except Exception:
             logger.error("Model Backend {} failed to load".format(module))
+            logger.error(traceback.format_exc())
+            
         
 
 old_socketio_on = socketio.on

From 309f1c432ae79acdbeb6b52a6f65ed963ef5d36d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 17:43:13 -0400
Subject: [PATCH 050/102] Added the ability to disable model backends in the
 model backend code.

---
 aiserver.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 7e8c09c8..40335a9f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -631,10 +631,14 @@ for module in os.listdir("./modeling/inference_models"):
         try:
             model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module))
             model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend()
+            if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]):
+                if model_backends[model_backend_code[module].model_backend_name].disable:
+                    del model_backends[model_backend_code[module].model_backend_name]
         except Exception:
             logger.error("Model Backend {} failed to load".format(module))
             logger.error(traceback.format_exc())
-            
+
+logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends])))
         
 
 old_socketio_on = socketio.on

From 6df5fe4ad07acb7b901b65ade005ec8af40126dc Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 18:24:06 -0400
Subject: [PATCH 051/102] partial load model from custom path in menu

---
 aiserver.py                                  | 20 ++++++++++++++++----
 modeling/inference_models/api/class.py       |  1 +
 modeling/inference_models/basic_api/class.py |  1 +
 modeling/inference_models/gooseai/class.py   |  1 +
 modeling/inference_models/horde/class.py     |  1 +
 modeling/inference_models/openai/class.py    |  1 +
 modeling/inference_models/openai_gooseai.py  |  6 ++++++
 7 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 40335a9f..14d268be 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6141,11 +6141,19 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        if 'path' not in data or data['path'] == "":
+        if data['ismenu'] == 'false':
             valid_loaders = {}
-            for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
-                valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
-            emit("selected_model_info", {"model_backends": valid_loaders})
+            if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
+                #Here if we have a model id that's in our menu, we explicitly use that backend
+                for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]):
+                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
+            else:
+                #Here we have a model that's not in our menu structure (either a custom model or a custom path
+                #so we'll just go through all the possible loaders
+                for model_backend in model_backends:
+                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
             paths, breadcrumbs = get_folder_path_info(data['path'])
@@ -6154,8 +6162,12 @@ def UI_2_select_model(data):
                 valid=False
                 for model_backend in model_backends:
                     if model_backends[model_backend].is_valid(path[1], path[0], "Custom"):
+                        logger.debug("{} says valid".format(model_backend))
                         valid=True
                         break
+                    else:
+                        logger.debug("{} says invalid".format(model_backend))
+                    
                 output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index d9ec1147..3d54edd9 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -6,6 +6,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py
index 6f045ef5..2094d34e 100644
--- a/modeling/inference_models/basic_api/class.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -4,6 +4,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py
index 8d58b4b5..1073f45f 100644
--- a/modeling/inference_models/gooseai/class.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 387c5833..2c4c4bf5 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -5,6 +5,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py
index 84fe6df9..492a3fdb 100644
--- a/modeling/inference_models/openai/class.py
+++ b/modeling/inference_models/openai/class.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index 4d885074..e4a027db 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -2,6 +2,7 @@ import torch
 import requests
 import numpy as np
 from typing import List, Optional, Union
+import os
 
 import utils
 from logger import logger
@@ -30,6 +31,11 @@ class model_backend(InferenceModel):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
+        try:
+            print(self.source)
+        except:
+            print(vars(self))
+            raise
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
                 self.key = json.load(f)['key']

From a1ee6849dc1d98c287561d5bdb6aff225c0322a5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 18:28:47 -0400
Subject: [PATCH 052/102] Custom Paths from Menu structure fixed

---
 aiserver.py                                 | 3 ++-
 modeling/inference_models/gooseai/class.py  | 2 +-
 modeling/inference_models/openai/class.py   | 2 +-
 modeling/inference_models/openai_gooseai.py | 5 -----
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 14d268be..d4a127f0 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6152,7 +6152,8 @@ def UI_2_select_model(data):
                 #Here we have a model that's not in our menu structure (either a custom model or a custom path
                 #so we'll just go through all the possible loaders
                 for model_backend in model_backends:
-                    valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
+                    if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]):
+                        valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"])
                 emit("selected_model_info", {"model_backends": valid_loaders})
         else:
             #Get directories
diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py
index 1073f45f..934f15dd 100644
--- a/modeling/inference_models/gooseai/class.py
+++ b/modeling/inference_models/gooseai/class.py
@@ -19,7 +19,6 @@ model_backend_name = "GooseAI"
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
-        self.source = "GooseAI"
 
 
 class model_backend(openai_gooseai_model_backend):
@@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend):
     def __init__(self):
         super().__init__()
         self.url = "https://api.goose.ai/v1/engines"
+        self.source = "GooseAI"
     
     def is_valid(self, model_name, model_path, menu_path):
         return  model_name == "GooseAI"
\ No newline at end of file
diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py
index 492a3fdb..cea644ea 100644
--- a/modeling/inference_models/openai/class.py
+++ b/modeling/inference_models/openai/class.py
@@ -19,7 +19,6 @@ model_backend_name = "OpenAI"
 class OpenAIAPIError(Exception):
     def __init__(self, error_type: str, error_message) -> None:
         super().__init__(f"{error_type}: {error_message}")
-        self.source = "OpenAI"
 
 
 class model_backend(openai_gooseai_model_backend):
@@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend):
     def __init__(self):
         super().__init__()
         self.url = "https://api.openai.com/v1/engines"
+        self.source = "OpenAI"
     
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI"
\ No newline at end of file
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index e4a027db..e4b9dfb8 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -31,11 +31,6 @@ class model_backend(InferenceModel):
         return model_name == "OAI" or model_name == "GooseAI"
     
     def get_requested_parameters(self, model_name, model_path, menu_path):
-        try:
-            print(self.source)
-        except:
-            print(vars(self))
-            raise
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
                 self.key = json.load(f)['key']

From 128c77e0fde7deae7fa30e65cc4166eb46ba314d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 19:01:11 -0400
Subject: [PATCH 053/102] Default model backend to huggingface if not present
 when loading a model through the command line

---
 aiserver.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index d4a127f0..a8591dc3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1365,7 +1365,7 @@ def general_startup(override_args=None):
     parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable")
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
-    parser.add_argument("--model_backend", help="Specify the model backend you want to use")
+    parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
     parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
@@ -1558,10 +1558,6 @@ def general_startup(override_args=None):
     
     if args.model:
         # At this point we have to try to load the model through the selected backend
-        if not args.model_backend:
-            logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command")
-            logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends])))
-            exit()
         if args.model_backend not in model_backends:
             logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends])))
             exit()
@@ -1576,11 +1572,11 @@ def general_startup(override_args=None):
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:
-            if parameter['default'] == "" or parameter['id'] not in arg_parameters:
+            if parameter['default'] == "" and parameter['id'] not in arg_parameters:
                 mising_parameters.append(parameter['id'])
                 ok_to_load = False
             elif parameter['id'] not in arg_parameters:
-                arg_parameters[parameter] = parameter['default']
+                arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))

From 19559d5eef5999c48503852d02d45c1c7fcce7ec Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 19 May 2023 19:15:25 -0400
Subject: [PATCH 054/102] Fix for colors in the classic UI

---
 static/custom.css        | 74 ++++++++++++++++++++++++++++++++++++++++
 templates/templates.html |  1 -
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/static/custom.css b/static/custom.css
index ffa6f44f..412c7f1b 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2330,4 +2330,78 @@ body.connected .popupfooter, .popupfooter.always-available {
 .popup .model_item .model_menu_selected {
 	color: var(--popup_selected_color);
 	background-color: var(--popup_selected_color_text);
+}
+
+.settings_select {
+	color: var(--dropdown_text);
+	background: var(--dropdown_background);
+	margin-left: auto;
+	margin-right: 25px;
+}
+
+.setting_value {
+	text-align: right;
+	grid-area: value;
+	font-size: calc(12px + var(--font_size_adjustment));
+	padding: 2px;
+	padding-top: 0px;
+	background-color: inherit;
+	color: inherit;
+	border: none; 
+	outline: none;
+}
+
+.setting_value:focus {
+	color: var(--text_edit);
+}
+
+.setting_container_model {
+	display: grid;
+	grid-template-areas: "label value"
+						 "item item"
+						 "minlabel maxlabel";
+	grid-template-rows: 20px auto 20px;
+	grid-template-columns: auto 30px;
+	row-gap: 0.2em;
+	background-color: var(--setting_background);
+	color: var(--setting_text);
+	border-radius: var(--radius_settings_background);
+	padding: 2px;
+	margin: 2px;
+	width: calc(100%);
+}
+
+.setting_container_model .setting_item{
+	font-size: calc(0.93em + var(--font_size_adjustment));
+	margin-left: 10px;
+}
+
+
+.setting_minlabel {
+	padding-top: 6px;
+	grid-area: minlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: left;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.setting_maxlabel {
+	padding-top: 6px;
+	grid-area: maxlabel;
+	overflow: hidden;
+	padding: 5px;
+	padding-top: 0px;
+	text-align: right;
+	font-size: calc(0.8em + var(--font_size_adjustment));
+}
+
+.setting_label {
+	display: flex;
+	grid-area: label;
+	overflow: hidden;
+	padding: 5px;
+	padding-right: 0px;
+	padding-top: 0px;
 }
\ No newline at end of file
diff --git a/templates/templates.html b/templates/templates.html
index 49fa99f6..926bf854 100644
--- a/templates/templates.html
+++ b/templates/templates.html
@@ -1,5 +1,4 @@
 <!---------------- World Info Card ---------------------->
-<link href="static/koboldai.css" rel="stylesheet">
 <div draggable="true" class="world_info_card" id="world_info_">
 	<div class="world_info_title_area">
 		<div>

From 513b8575e71d164fc82747009f8fd3391f4ceb28 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 20 May 2023 11:01:49 -0400
Subject: [PATCH 055/102] Fix for missing import Fix for model name being a
 path which caused save issues

---
 aiserver.py                     | 2 +-
 modeling/inference_models/hf.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index a8591dc3..38ffc3f6 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6165,7 +6165,7 @@ def UI_2_select_model(data):
                     else:
                         logger.debug("{} says invalid".format(model_backend))
                     
-                output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
+                output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid})
             emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs})            
     return
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index eff3d1ce..318423d5 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -8,6 +8,7 @@ import koboldai_settings
 from logger import logger
 from modeling.inference_model import InferenceModel
 import torch
+import gc
 
 
 class HFInferenceModel(InferenceModel):

From 925cad2e2fa6c65b8ea37680d19fa69023cce9f5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 14:50:13 -0400
Subject: [PATCH 056/102] Better compatibility with hf model backend

---
 modeling/inference_models/hf.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 318423d5..b209d49f 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -1,4 +1,4 @@
-import os
+import os, sys
 from typing import Optional
 from transformers import AutoConfig
 import warnings
@@ -196,9 +196,10 @@ class HFInferenceModel(InferenceModel):
         except:
             pass
         if self.hf_torch:
-            breakmodel.breakmodel = True
-            breakmodel.gpu_blocks = []
-            breakmodel.disk_blocks = 0
+            if 'breakmodel' in sys.modules:
+                breakmodel.breakmodel = True
+                breakmodel.gpu_blocks = []
+                breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults

From dc20e6dde9152fd609ae06d362b05b9a0ac29bb5 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 15:04:33 -0400
Subject: [PATCH 057/102] Fix for unloading models

---
 modeling/inference_models/hf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index b209d49f..53c802b1 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -197,6 +197,7 @@ class HFInferenceModel(InferenceModel):
             pass
         if self.hf_torch:
             if 'breakmodel' in sys.modules:
+                import breakmodel
                 breakmodel.breakmodel = True
                 breakmodel.gpu_blocks = []
                 breakmodel.disk_blocks = 0

From ca770844b0d6002f07d5b347190be0b25e6faf3d Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 15:07:59 -0400
Subject: [PATCH 058/102] Fix for breakmodel

---
 modeling/inference_models/hf_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 5dd53bf8..47c37436 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -820,7 +820,7 @@ class HFTorchInferenceModel(HFInferenceModel):
             breakmodel.gpu_blocks = [0] * n_layers
             return
 
-        elif breakmodel.gpu_blocks != []:
+        elif breakmodel.gpu_blocks == []:
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0

From f1a16f260f4f22384ae882042860228134bf6222 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 16:10:41 -0400
Subject: [PATCH 059/102] Potential breakmodel fix

---
 modeling/inference_models/hf_torch.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 47c37436..5595edc7 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -824,6 +824,20 @@ class HFTorchInferenceModel(HFInferenceModel):
             logger.info("Breakmodel not specified, assuming GPU 0")
             breakmodel.gpu_blocks = [n_layers]
             n_layers = 0
+        
+        else:
+            s = n_layers
+            for i in range(len(breakmodel.gpu_blocks)):
+                if breakmodel.gpu_blocks[i] <= -1:
+                    breakmodel.gpu_blocks[i] = s
+                    break
+                else:
+                    s -= breakmodel.gpu_blocks[i]
+            assert sum(breakmodel.gpu_blocks) <= n_layers
+            n_layers -= sum(breakmodel.gpu_blocks)
+            if breakmodel.disk_blocks is not None:
+                assert breakmodel.disk_blocks <= n_layers
+                n_layers -= breakmodel.disk_blocks
 
         logger.init_ok("Final device configuration:", status="Info")
         self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device)

From 9e53bcf67684198bbbaeb3e67281c1641419f448 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:24:57 -0400
Subject: [PATCH 060/102] Fix for breakmodel loading to CPU when set to GPU

---
 modeling/inference_models/generic_hf_torch/class.py | 8 +++++---
 modeling/inference_models/hf.py                     | 6 ++++--
 modeling/inference_models/hf_torch.py               | 3 +++
 static/custom.css                                   | 5 +++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 4e2c8a5b..572337e2 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel):
 
         self.patch_embedding()
 
+        
         if utils.koboldai_vars.hascuda:
-            if utils.koboldai_vars.usegpu:
+            if self.usegpu:
                 # Use just VRAM
                 self.model = self.model.half().to(utils.koboldai_vars.gpu_device)
-            elif utils.koboldai_vars.breakmodel:
+            elif self.breakmodel:
                 # Use both RAM and VRAM (breakmodel)
                 if not self.lazy_load:
                     self.breakmodel_device_config(self.model.config)
@@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel):
             self._move_to_devices()
         else:
             self.model = self.model.to("cpu").float()
-
+        
+        
         self.model.kai_model = self
         utils.koboldai_vars.modeldim = self.get_hidden_size()
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 53c802b1..e801eab2 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel):
                          layers.append(None)
                     else:
                         layers.append(parameters["{}_Layers".format(i)])
-                self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None
+                self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None
                 if isinstance(self.cpu_layers, str):
                     self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0
                 self.layers = layers
@@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel):
                     self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0
                 breakmodel.gpu_blocks = layers
                 breakmodel.disk_blocks = self.disk_layers
-            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+                self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
+        else:
+            self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 5595edc7..c5560360 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel):
             return "Unknown"
 
     def _post_load(m_self) -> None:
+
         if not utils.koboldai_vars.model_type:
             utils.koboldai_vars.model_type = m_self.get_model_type()
 
@@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel):
                                 )
                             )
                             # print(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True)
+                            #logger.debug(f"Transferring <{key}>  to  {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ")
                             model_dict[key] = model_dict[key].materialize(
                                 f, map_location="cpu"
                             )
@@ -847,6 +849,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         # If all layers are on the same device, use the old GPU generation mode
         while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0:
             breakmodel.gpu_blocks.pop()
+        self.breakmodel = True
         if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (
             -1,
             utils.num_layers(config),
diff --git a/static/custom.css b/static/custom.css
index 412c7f1b..968d73e4 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2404,4 +2404,9 @@ body.connected .popupfooter, .popupfooter.always-available {
 	padding: 5px;
 	padding-right: 0px;
 	padding-top: 0px;
+}
+
+.input_error {
+	border: 5px solid red !important;
+	box-sizing: border-box !important;
 }
\ No newline at end of file

From 4c25d6fbbbfad67176056a6f5af1826c2c2eb24c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:34:01 -0400
Subject: [PATCH 061/102] Fix for loading model multiple times loosing the
 gpu/cpu splits

---
 modeling/inference_models/hf.py       | 6 ------
 modeling/inference_models/hf_torch.py | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index e801eab2..b50ebf56 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel):
                 torch.cuda.empty_cache()
         except:
             pass
-        if self.hf_torch:
-            if 'breakmodel' in sys.modules:
-                import breakmodel
-                breakmodel.breakmodel = True
-                breakmodel.gpu_blocks = []
-                breakmodel.disk_blocks = 0
 
     def _post_load(self) -> None:
         # These are model specific tokenizer overrides if a model has bad defaults
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index c5560360..681d3ab1 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         if device_count < 2:
             primary = None
         logger.debug("n_layers: {}".format(n_layers))
+        logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks))
         gpu_blocks = breakmodel.gpu_blocks + (
             device_count - len(breakmodel.gpu_blocks)
         ) * [0]
@@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel):
 
         n_layers = utils.num_layers(config)
 
+        logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks))
+
         if utils.args.cpu:
             breakmodel.gpu_blocks = [0] * n_layers
             return

From 48226191922a48024a75a531668d3638b1f71155 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Mon, 22 May 2023 20:47:14 -0400
Subject: [PATCH 062/102] Fix for model backends that have no inputs not being
 able to load in the UI

---
 static/koboldai.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/static/koboldai.js b/static/koboldai.js
index dabbcda9..c4b2e160 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1933,6 +1933,8 @@ function selected_model_info(sent_data) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
+	model_settings_checker()
+	
 }
 
 function update_gpu_layers() {

From 5561cc1f220c0cf9d957bcbd3e535ad88502ab82 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 08:33:19 -0400
Subject: [PATCH 063/102] Fix for GPU generation

---
 modeling/inference_models/hf_torch.py | 13 ++++++++-
 static/application.js                 | 42 +++++++++++++++------------
 static/koboldai.js                    | 40 +++++++++++++------------
 3 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 681d3ab1..2f575e73 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -125,6 +125,17 @@ class HFTorchInferenceModel(HFInferenceModel):
         else:
             return "Unknown"
 
+    def get_auxilary_device(self):
+        """Get device auxilary tensors like inputs should be stored on."""
+
+        # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU.
+        if utils.koboldai_vars.hascuda and self.usegpu:
+            return utils.koboldai_vars.gpu_device
+        elif utils.koboldai_vars.hascuda and self.breakmodel:
+            import breakmodel
+            return breakmodel.primary_device
+        return "cpu"
+
     def _post_load(m_self) -> None:
 
         if not utils.koboldai_vars.model_type:
@@ -226,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel):
         else:
             gen_in = prompt_tokens
 
-        device = utils.get_auxilary_device()
+        device = self.get_auxilary_device()
         gen_in = gen_in.to(device)
 
         additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else []
diff --git a/static/application.js b/static/application.js
index ca445c5f..ca81f729 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4012,16 +4012,18 @@ function model_settings_checker() {
 				//get an object of all the input settings from the user
 				data = {}
 				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
-				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-					var element_data = element.value;
-					if (element.getAttribute("data_type") == "int") {
-						element_data = parseInt(element_data);
-					} else if (element.getAttribute("data_type") == "float") {
-						element_data = parseFloat(element_data);
-					} else if (element.getAttribute("data_type") == "bool") {
-						element_data = (element_data == 'on');
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
 					}
-					data[element.id.split("|")[1].replace("_value", "")] = element_data;
 				}
 				data = {...data, ...selected_model_data};
 				
@@ -4259,6 +4261,8 @@ function selected_model_info(sent_data) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
+	model_settings_checker();
+	
 }
 
 function getModelParameterCount(modelName) {
@@ -4371,16 +4375,18 @@ function load_model() {
 	
 	//get an object of all the input settings from the user
 	data = {}
-	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		var element_data = element.value;
-		if (element.getAttribute("data_type") == "int") {
-			element_data = parseInt(element_data);
-		} else if (element.getAttribute("data_type") == "float") {
-			element_data = parseFloat(element_data);
-		} else if (element.getAttribute("data_type") == "bool") {
-			element_data = (element_data == 'on');
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if (element.getAttribute("data_type") == "int") {
+				element_data = parseInt(element_data);
+			} else if (element.getAttribute("data_type") == "float") {
+				element_data = parseFloat(element_data);
+			} else if (element.getAttribute("data_type") == "bool") {
+				element_data = (element_data == 'on');
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
-		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	
diff --git a/static/koboldai.js b/static/koboldai.js
index c4b2e160..f0a1f6f8 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1686,16 +1686,18 @@ function model_settings_checker() {
 				//get an object of all the input settings from the user
 				data = {}
 				settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area");
-				for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-					var element_data = element.value;
-					if (element.getAttribute("data_type") == "int") {
-						element_data = parseInt(element_data);
-					} else if (element.getAttribute("data_type") == "float") {
-						element_data = parseFloat(element_data);
-					} else if (element.getAttribute("data_type") == "bool") {
-						element_data = (element_data == 'on');
+				if (settings_area) {
+					for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+						var element_data = element.value;
+						if (element.getAttribute("data_type") == "int") {
+							element_data = parseInt(element_data);
+						} else if (element.getAttribute("data_type") == "float") {
+							element_data = parseFloat(element_data);
+						} else if (element.getAttribute("data_type") == "bool") {
+							element_data = (element_data == 'on');
+						}
+						data[element.id.split("|")[1].replace("_value", "")] = element_data;
 					}
-					data[element.id.split("|")[1].replace("_value", "")] = element_data;
 				}
 				data = {...data, ...selected_model_data};
 				
@@ -1965,16 +1967,18 @@ function load_model() {
 	
 	//get an object of all the input settings from the user
 	data = {}
-	for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
-		var element_data = element.value;
-		if (element.getAttribute("data_type") == "int") {
-			element_data = parseInt(element_data);
-		} else if (element.getAttribute("data_type") == "float") {
-			element_data = parseFloat(element_data);
-		} else if (element.getAttribute("data_type") == "bool") {
-			element_data = (element_data == 'on');
+	if (settings_area) {
+		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
+			var element_data = element.value;
+			if (element.getAttribute("data_type") == "int") {
+				element_data = parseInt(element_data);
+			} else if (element.getAttribute("data_type") == "float") {
+				element_data = parseFloat(element_data);
+			} else if (element.getAttribute("data_type") == "bool") {
+				element_data = (element_data == 'on');
+			}
+			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
-		data[element.id.split("|")[1].replace("_value", "")] = element_data;
 	}
 	data = {...data, ...selected_model_data};
 	

From 7a8e4c39da3c1d30ddf3489945799b2695d9be86 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 08:35:15 -0400
Subject: [PATCH 064/102] Fix for attention bias

---
 aiserver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 38ffc3f6..6276e514 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3693,7 +3693,8 @@ def calcsubmit(txt):
                         bias += [1] * (i - top_index)
                     bias[i] = b["multiplier"]
 
-            device = utils.get_auxilary_device()
+            
+            device = model.get_auxilary_device()
             attention_bias.attention_bias = torch.Tensor(bias).to(device)
             logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}")
         logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))

From d6c37bbac0fdbbc6a5eba4671bdd85f695efd822 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Tue, 23 May 2023 22:59:36 +0800
Subject: [PATCH 065/102] Updated embedded Kobold Lite to v32

---
 static/klite.html | 422 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 318 insertions(+), 104 deletions(-)

diff --git a/static/klite.html b/static/klite.html
index 0893ebbe..8f3e55d1 100644
--- a/static/klite.html
+++ b/static/klite.html
@@ -3,7 +3,7 @@
 
 <!-- 
 An embedded version of Kobold Lite for use in koboldcpp and KoboldAI United Client
-Current version: 27
+Current version: 32
 Please go to https://github.com/kaihordewebui/kaihordewebui.github.io for updates on Kobold Lite.
 Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and KoboldAI United Client. Please do not remove this line.
 
@@ -54,11 +54,14 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		}
 
 		.settinglabel input {
-			width: 5ch;
+			width: 6ch;
 			background-color: inherit;
 			border: none;
 			outline: none;
 		}
+		.settinglabel input[type=checkbox] {
+			width: 3ch;	
+		}
 
 		.settinglabel.mininiput {	
 			background-color: #ffffff;
@@ -583,6 +586,19 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		.color_red {
 			color: #ff7967;
 		}
+		.color_chat1 {
+			color: #da6060;
+		}
+		.color_chat2 {
+			color: #e0c158;
+		}
+		.color_chat3 {
+			color: #53c753;
+		}
+		.color_chat4 {
+			color: #b469ae;
+		}
+		
 		.color_blue {
 			color: #828eff;
 		}
@@ -1224,6 +1240,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		padding: 2px; 
 		margin: 0px;
 		width: 60vw;
+		resize: vertical;
 		}
 		.wilist
 		{
@@ -1271,11 +1288,11 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			border-top: 6px solid #3498db;
 			/* Blue */
 			border-radius: 50%;
-			animation: spin 2s linear infinite;
+			animation: spin 4s linear infinite;
 		}
 		.innerloader.greenloader
 		{
-			border-top: 6px solid #0dff35;
+			border-top: 6px solid #0dcc2d;
 		}
 		.innerloader.redloader
 		{
@@ -1296,7 +1313,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			margin: auto;
 			align-items: center;
 			justify-content: center;
-			animation: spin 2s linear infinite;
+			animation: spin 4s linear infinite;
 
 			top: 0;
 			bottom: 0;
@@ -1350,7 +1367,51 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			0% {
 				transform: rotate(0deg);
 			}
-
+			12.4% {
+				transform: rotate(0deg);
+			}
+			12.5% {
+				transform: rotate(45deg);
+			}
+			24.9% {
+				transform: rotate(45deg);
+			}
+			25% {
+				transform: rotate(90deg);
+			}
+			37.4% {
+				transform: rotate(90deg);
+			}
+			37.5% {
+				transform: rotate(135deg);
+			}
+			49.9% {
+				transform: rotate(135deg);
+			}
+			50% {
+				transform: rotate(180deg);
+			}
+			62.4% {
+				transform: rotate(180deg);
+			}
+			62.5% {
+				transform: rotate(225deg);
+			}
+			74.9% {
+				transform: rotate(225deg);
+			}
+			75% {
+				transform: rotate(270deg);
+			}
+			87.4% {
+				transform: rotate(270deg);
+			}
+			87.5% {
+				transform: rotate(315deg);
+			}
+			99.9% {
+				transform: rotate(315deg);
+			}
 			100% {
 				transform: rotate(360deg);
 			}
@@ -1775,7 +1836,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			"prefmodel1":["llama","alpaca","nerybus","alpacino"],
 			"prefmodel2":["pygmalion","erebus","nerybus","opt"],
 			"prompt":"\nBob: So, did anyone want to order a pizza?\nMike: Yeah, I'm starving.",
-			"memory":`[You are in a class reunion, meeting a group of old former schoolmates. The following is a group conversation bewteen you and your friends.]`,
+			"memory":`[You are in a class reunion, meeting a group of old former schoolmates. The following is a group conversation between you and your friends.]`,
 			"authorsnote": "",
 			"worldinfo": []
 		},
@@ -1878,6 +1939,23 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		return str.replace(new RegExp(escapeRegExp(find), 'g'), replace);
 	}
 
+	function GetUniqueColor(idx)
+	{
+		switch(idx)
+		{
+			case 0:
+				return 'color_chat1';
+			case 1:
+				return 'color_chat2';
+			case 2:
+				return 'color_chat3';
+			case 3:
+				return 'color_chat4';
+			default:
+				return 'color_chat1';
+		}
+	}
+
 	//import tavern png data. adapted from png-chunks-extract under MIT license
 	//accepts png input data, and returns the extracted JSON
 	function convertTavernPng(data)
@@ -2165,14 +2243,15 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		let proxy_part = "";
 
 		//we never attempt to proxy localhost addresses
-		let is_localhost = false;
+		let is_local = false;
 
 		if (url) {
-			is_localhost = (url.toLowerCase().includes("localhost") ||
-				url.toLowerCase().includes("127.0.0.1"));
+			is_local = (url.toLowerCase().includes("localhost") ||
+				url.toLowerCase().includes("127.0.0.1") ||
+				url.toLowerCase().includes("192.168."));
 		}
 
-		if (uses_cors_proxy && !is_localhost) {
+		if (uses_cors_proxy && !is_local) {
 			proxy_part = cors_proxy + "?";
 		}
 		return proxy_part + url;
@@ -2217,17 +2296,39 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					}
 					if (localsettings.opmode == 4) //stop on selfname found
 					{
-						let foundStop = synchro_streaming_response.indexOf("\n"+localsettings.instruct_starttag);						
+						let st = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_starttag):(localsettings.instruct_starttag);
+						let et = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_endtag):(localsettings.instruct_endtag);
+						let foundStop = synchro_streaming_response.indexOf(st);						
 						if (foundStop != -1)
 						{
 							synchro_streaming_tokens_left = 0;
 						}
-						foundStop = synchro_streaming_response.indexOf("\n"+localsettings.instruct_endtag);						
+						foundStop = synchro_streaming_response.indexOf(et);						
 						if (foundStop != -1)
 						{
 							synchro_streaming_tokens_left = 0;
 						}
 					}
+					
+					//stop on any stop token
+					if(extrastopseq!="")
+					{
+						let rep = replaceAll(extrastopseq,"\\n","\n");
+						let srep = rep.split("||$||");
+						if (srep.length > 0) {
+							for (let i = 0; i < srep.length; ++i) {
+								if (srep[i] && srep[i] != "") {
+									let foundStop = synchro_streaming_response.indexOf(srep[i]);						
+									if (foundStop != -1)
+									{
+										synchro_streaming_tokens_left = 0;
+										break;
+									}
+								}
+							}
+						}
+					}
+
 					if(data.results[0].text=="") //stop on no output
 					{
 						synchro_streaming_tokens_left = 0;
@@ -2293,6 +2394,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		return segmentsA.length - segmentsB.length;
 	}
 
+	//casualwriter casual-markdown, under MIT license
+	function simpleMarkdown(e){var r=function(e){return e.replace(/</g,"<").replace(/\>/g,">")},l=function(e,r){return"<pre><code>"+(r=(r=(r=(r=(r=r.replace(/</g,"&lt;").replace(/\>/g,"&gt;")).replace(/\t/g,"   ").replace(/\^\^\^(.+?)\^\^\^/g,"<mark>$1</mark>")).replace(/^\/\/(.*)/gm,"<rem>//$1</rem>").replace(/\s\/\/(.*)/gm," <rem>//$1</rem>")).replace(/(\s?)(function|procedure|return|exit|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,"$1<b>$2</b>$3")).replace(/(\s?)(var|let|const|=>|for|next|do|while|loop|continue|break|switch|try|catch|finally)(\s)/gim,"$1<b>$2</b>$3"))+"</code></pre>"},c=function(e){return(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=e.replace(/^##### (.*?)\s*#*$/gm,"<h5>$1</h5>").replace(/^#### (.*?)\s*#*$/gm,"<h4>$1</h4>").replace(/^### (.*?)\s*#*$/gm,"<h3>$1</h3>").replace(/^## (.*?)\s*#*$/gm,"<h2>$1</h2>").replace(/^# (.*?)\s*#*$/gm,"<h1>$1</h1>").replace(/^<h(\d)\>(.*?)\s*{(.*)}\s*<\/h\d\>$/gm,'<h$1 id="$3">$2</h$1>')).replace(/^-{3,}|^\_{3,}|^\*{3,}$/gm,"<hr/>")).replace(/``(.*?)``/gm,function(e,l){return"<code>"+r(l).replace(/`/g,"`")+"</code>"})).replace(/`(.*?)`/gm,"<code>$1</code>")).replace(/^\>\> (.*$)/gm,"<blockquote><blockquote>$1</blockquote></blockquote>")).replace(/^\> (.*$)/gm,"<blockquote>$1</blockquote>")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n")).replace(/<\/blockquote\>\n<blockquote\>/g,"\n<br>")).replace(/!\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<img alt="$1" src="$2" $3 />')).replace(/!\[(.*?)\]\((.*?)\)/gm,'<img alt="$1" src="$2" />')).replace(/\[(.*?)\]\((.*?) "new"\)/gm,'<a href="$2" target=_new>$1</a>')).replace(/\[(.*?)\]\((.*?) "(.*?)"\)/gm,'<a href="$2" title="$3">$1</a>')).replace(/<http(.*?)\>/gm,'<a href="http$1">http$1</a>')).replace(/\[(.*?)\]\(\)/gm,'<a href="$1">$1</a>')).replace(/\[(.*?)\]\((.*?)\)/gm,'<a href="$2">$1</a>')).replace(/^[\*+-][ .](.*)/gm,"<ul><li>$1</li></ul>")).replace(/^\d[ .](.*)/gm,"<ol><li>$1</li></ol>")).replace(/^\s{2,6}[\*+-][ .](.*)/gm,"<ul><ul><li>$1</li></ul></ul>")).replace(/^\s{2,6}\d[ .](.*)/gm,"<ul><ol><li>$1</li></ol></ul>")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n")).replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm,"<b><em>$1</em></b>")).replace(/\*\*(\w.*?[^\\])\*\*/gm,"<b>$1</b>")).replace(/\*(\w.*?[^\\])\*/gm,"<em>$1</em>")).replace(/___(\w.*?[^\\])___/gm,"<b><em>$1</em></b>")).replace(/__(\w.*?[^\\])__/gm,"<u>$1</u>")).replace(/~~(\w.*?)~~/gm,"<del>$1</del>")).replace(/\^\^(\w.*?)\^\^/gm,"<ins>$1</ins>")).replace(/\{\{(\w.*?)\}\}/gm,"<mark>$1</mark>")).replace(/\n\|([\s\S]*)\|\s*\n\s*\n/g,function(e,r){var l;return"\n<table><thead>\n<tr><th>"+r.substr(0,r.indexOf("\n")-1).replace(/\|/g,"<th>")+"</thead>\n<tr>"+r.replace(/.*\n\|\-(.*)\-\|\n/g,"").replace(/\|\s*\n/g,"\n<tr>").replace(/\|/g,"<td>")+"\n</tr></table>\n\n"})).replace(/  \n/g,"\n<br/>").replace(/\n\s*\n/g,"\n<p>\n")).replace(/^ {4,10}(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/^\t(.*)/gm,function(e,l){return"<pre><code>"+r(l)+"</code></pre>"})).replace(/<\/code\><\/pre\>\n<pre\><code\>/g,"\n")).replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm,"$1")},a=0,n=0,p="";for(e=(e=e.replace(/\r\n/g,"\n").replace(/\n~~~/g,"\n```")).replace(/```([^`]+)```/g,l);(a=e.indexOf("<code>"))>=0;)n=e.indexOf("</code>",a),p+=c(e.substr(0,a))+e.substr(a+6,n>0?n-a-6:mdtext.length),e=e.substr(n+7);return p+c(e)}
+	
 	//LMZA-JS, under MIT license
 	var lz_c=function(){"use strict";function r(e,r){postMessage({action:Ur,cbn:r,result:e})}function t(e){var r=[];return r[e-1]=void 0,r}function n(e,r){return i(e[0]+r[0],e[1]+r[1])}function s(e,r){return f(~~Math.max(Math.min(e[1]/$r,2147483647),-2147483648)&~~Math.max(Math.min(r[1]/$r,2147483647),-2147483648),c(e)&c(r))}function o(e,r){var t,n;return e[0]==r[0]&&e[1]==r[1]?0:(t=0>e[1],n=0>r[1],t&&!n?-1:!t&&n?1:h(e,r)[1]<0?-1:1)}function i(e,r){var t,n;for(r%=0x10000000000000000,e%=0x10000000000000000,t=r%$r,n=Math.floor(e/$r)*$r,r=r-t+n,e=e-n+t;0>e;)e+=$r,r-=$r;for(;e>4294967295;)e-=$r,r+=$r;for(r%=0x10000000000000000;r>0x7fffffff00000000;)r-=0x10000000000000000;for(;-0x8000000000000000>r;)r+=0x10000000000000000;return[e,r]}function _(e,r){return e[0]==r[0]&&e[1]==r[1]}function a(e){return e>=0?[e,0]:[e+$r,-$r]}function c(e){return e[0]>=2147483648?~~Math.max(Math.min(e[0]-$r,2147483647),-2147483648):~~Math.max(Math.min(e[0],2147483647),-2147483648)}function f(e,r){var t,n;return t=e*$r,n=r,0>r&&(n+=$r),[n,t]}function u(e){return 30>=e?1<<e:u(30)*u(e-30)}function m(e,r){var t,n,s,o;if(r&=63,_(e,rt))return r?tt:e;if(0>e[1])throw Error("Neg");return o=u(r),n=e[1]*o%0x10000000000000000,s=e[0]*o,t=s-s%$r,n+=t,s-=t,n>=0x8000000000000000&&(n-=0x10000000000000000),[s,n]}function p(e,r){var t;return r&=63,t=u(r),i(Math.floor(e[0]/t),e[1]/t)}function d(e,r){var t;return r&=63,t=p(e,r),0>e[1]&&(t=n(t,m([2,0],63-r))),t}function h(e,r){return i(e[0]-r[0],e[1]-r[1])}function P(e,r){return e.dc=r,e.hc=0,e.Db=r.length,e}function l(e,r,t,n){return e.hc>=e.Db?-1:(n=Math.min(n,e.Db-e.hc),b(e.dc,e.hc,r,t,n),e.hc+=n,n)}function v(e){return e.dc=t(32),e.Db=0,e}function B(e){var r=e.dc;return r.length=e.Db,r}function k(e,r){e.dc[e.Db++]=r<<24>>24}function S(e,r,t,n){b(r,t,e.dc,e.Db,n),e.Db+=n}function M(e,r,t,n,s){var o;for(o=r;t>o;++o)n[s++]=e.charCodeAt(o)}function b(e,r,t,n,s){for(var o=0;s>o;++o)t[n+o]=e[r+o]}function E(e,r){fr(r,1<<e.s),r.j=e.f,ur(r,e.m),r.U=0,r.V=3,r.N=2,r.u=3}function g(r,t,n,s,i){var _,a;if(o(s,et)<0)throw Error("invalid length "+s);for(r.gc=s,_=U({}),E(i,_),_.Xb=void 0===lz_c.disableEndMark,mr(_,n),a=0;64>a;a+=8)k(n,255&c(p(s,a)));r.Ub=(_.L=0,_.Kb=t,_.Gb=0,Q(_),_.c.cc=n,or(_),$(_),X(_),_.P.fb=_.j+1-2,br(_.P,1<<_.N),_.f.fb=_.j+1-2,br(_.f,1<<_.N),void(_.x=tt),Z({},_))}function y(e,r,t){return e._b=v({}),g(e,P({},r),e._b,a(r.length),t),e}function R(e,r,n,s){var o;e.Rb=r,e.zb=n,o=r+n+s,(null==e.d||e.nb!=o)&&(e.d=null,e.nb=o,e.d=t(e.nb)),e.B=e.nb-n}function F(e,r){return e.d[e.e+e.v+r]}function L(e,r,t,n){var s,o;for(e.K&&e.v+r+n>e.q&&(n=e.q-(e.v+r)),++t,o=e.e+e.v+r,s=0;n>s&&e.d[o+s]==e.d[o+s-t];++s);return s}function z(e){return e.q-e.v}function C(e){var r,t,n;for(n=e.e+e.v-e.Rb,n>0&&--n,t=e.e+e.q-n,r=0;t>r;++r)e.d[r]=e.d[n+r];e.e-=n}function w(e){var r;++e.v,e.v>e.jb&&(r=e.e+e.v,r>e.B&&C(e),x(e))}function x(e){var r,t,n;if(!e.K)for(;;){if(n=-e.e+e.nb-e.q,!n)return;if(r=l(e.ac,e.d,e.e+e.q,n),-1==r)return e.jb=e.q,t=e.e+e.jb,t>e.B&&(e.jb=e.B-e.e),void(e.K=1);e.q+=r,e.q>=e.v+e.zb&&(e.jb=e.q-e.zb)}}function D(e,r){e.e+=r,e.jb-=r,e.v-=r,e.q-=r}function A(e,r,n,s,o){var i,_,a;1073741567>r&&(e.Vb=16+(s>>1),a=~~((r+n+s+o)/2)+256,R(e,r+n,s+o,a),e.bb=s,i=r+1,e.l!=i&&(e.E=t(2*(e.l=i))),_=65536,e.ab&&(_=r-1,_|=_>>1,_|=_>>2,_|=_>>4,_|=_>>8,_>>=1,_|=65535,_>16777216&&(_>>=1),e.Wb=_,++_,_+=e.F),_!=e.Ib&&(e.$=t(e.Ib=_)))}function I(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v,B,k,S,M;if(e.q>=e.v+e.bb)h=e.bb;else if(h=e.q-e.v,e.ib>h)return H(e),0;for(v=0,P=e.v>e.l?e.v-e.l:0,n=e.e+e.v,l=1,c=0,f=0,e.ab?(M=st[255&e.d[n]]^255&e.d[n+1],c=1023&M,M^=(255&e.d[n+2])<<8,f=65535&M,u=(M^st[255&e.d[n+3]]<<5)&e.Wb):u=255&e.d[n]^(255&e.d[n+1])<<8,s=e.$[e.F+u]||0,e.ab&&(o=e.$[c]||0,i=e.$[1024+f]||0,e.$[c]=e.v,e.$[1024+f]=e.v,o>P&&e.d[e.e+o]==e.d[n]&&(r[v++]=l=2,r[v++]=e.v-o-1),i>P&&e.d[e.e+i]==e.d[n]&&(i==o&&(v-=2),r[v++]=l=3,r[v++]=e.v-i-1,o=i),0!=v&&o==s&&(v-=2,l=1)),e.$[e.F+u]=e.v,k=(e.h<<1)+1,S=e.h<<1,p=d=e.s,0!=e.s&&s>P&&e.d[e.e+s+e.s]!=e.d[n+e.s]&&(r[v++]=l=e.s,r[v++]=e.v-s-1),t=e.Vb;;){if(P>=s||0==t--){e.E[k]=e.E[S]=0;break}if(a=e.v-s,_=(e.h>=a?e.h-a:e.h-a+e.l)<<1,B=e.e+s,m=d>p?p:d,e.d[B+m]==e.d[n+m]){for(;++m!=h&&e.d[B+m]==e.d[n+m];);if(m>l&&(r[v++]=l=m,r[v++]=a-1,m==h)){e.E[S]=e.E[_],e.E[k]=e.E[_+1];break}}(255&e.d[n+m])>(255&e.d[B+m])?(e.E[S]=s,S=_+1,s=e.E[S],d=m):(e.E[k]=s,k=_,s=e.E[k],p=m)}return H(e),v}function O(e){e.e=0,e.v=0,e.q=0,e.K=0,x(e),e.h=0,D(e,-1)}function H(e){var r;++e.h>=e.l&&(e.h=0),w(e),1073741823==e.v&&(r=e.v-e.l,N(e.E,2*e.l,r),N(e.$,e.Ib,r),D(e,r))}function N(e,r,t){var n,s;for(n=0;r>n;++n)s=e[n]||0,t>=s?s=0:s-=t,e[n]=s}function G(e,r){e.ab=r>2,e.ab?(e.s=0,e.ib=4,e.F=66560):(e.s=2,e.ib=3,e.F=0)}function T(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v;do{if(e.q>=e.v+e.bb)p=e.bb;else if(p=e.q-e.v,e.ib>p){H(e);continue}for(d=e.v>e.l?e.v-e.l:0,n=e.e+e.v,e.ab?(v=st[255&e.d[n]]^255&e.d[n+1],_=1023&v,e.$[_]=e.v,v^=(255&e.d[n+2])<<8,a=65535&v,e.$[1024+a]=e.v,c=(v^st[255&e.d[n+3]]<<5)&e.Wb):c=255&e.d[n]^(255&e.d[n+1])<<8,s=e.$[e.F+c],e.$[e.F+c]=e.v,P=(e.h<<1)+1,l=e.h<<1,u=m=e.s,t=e.Vb;;){if(d>=s||0==t--){e.E[P]=e.E[l]=0;break}if(i=e.v-s,o=(e.h>=i?e.h-i:e.h-i+e.l)<<1,h=e.e+s,f=m>u?u:m,e.d[h+f]==e.d[n+f]){for(;++f!=p&&e.d[h+f]==e.d[n+f];);if(f==p){e.E[l]=e.E[o],e.E[P]=e.E[o+1];break}}(255&e.d[n+f])>(255&e.d[h+f])?(e.E[l]=s,l=o+1,s=e.E[l],m=f):(e.E[P]=s,P=o,s=e.E[P],u=f)}H(e)}while(0!=--r)}function W(e){return e-=2,4>e?e:3}function Y(e){return 4>e?0:10>e?e-3:e-6}function Z(e,r){return e._=r,e.ic=null,e.bc=1,e}function V(e){if(!e.bc)throw Error("bad state");if(!e._)throw Error("No decoding");return j(e),e.bc}function j(e){J(e._,e._.tb,e._.Nb,e._.$b),e.Ob=e._.tb[0],e._.$b[0]&&(cr(e._),e.bc=0)}function K(e,r){var t,n,s,o;e.W=r,s=e.a[r].n,n=e.a[r].g;do e.a[r].p&&(Cr(e.a[s]),e.a[s].n=s-1,e.a[r].Sb&&(e.a[s-1].p=0,e.a[s-1].n=e.a[r].n2,e.a[s-1].g=e.a[r].g2)),o=s,t=n,n=e.a[o].g,s=e.a[o].n,e.a[o].g=t,e.a[o].n=r,r=o;while(r>0);return e.Z=e.a[0].g,e.m=e.a[0].n}function q(e){e.i=0,e.C=0;for(var r=0;4>r;++r)e.r[r]=0}function J(e,r,t,s){var i,f,u,m,p,d,P,l,v,B,k,S,M,b,E;if(r[0]=tt,t[0]=tt,s[0]=1,e.Kb&&(e.b.ac=e.Kb,O(e.b),e.L=1,e.Kb=null),!e.Gb){if(e.Gb=1,b=e.x,_(e.x,tt)){if(!z(e.b))return void er(e,c(e.x));_r(e),M=c(e.x)&e.u,Tr(e.c,e.z,(e.i<<4)+M,0),e.i=Y(e.i),u=F(e.b,-e.o),Rr(gr(e.y,c(e.x),e.C),e.c,u),e.C=u,--e.o,e.x=n(e.x,nt)}if(!z(e.b))return void er(e,c(e.x));for(;;){if(P=rr(e,c(e.x)),B=e.Z,M=c(e.x)&e.u,f=(e.i<<4)+M,1==P&&-1==B)Tr(e.c,e.z,f,0),u=F(e.b,-e.o),E=gr(e.y,c(e.x),e.C),7>e.i?Rr(E,e.c,u):(v=F(e.b,-e.r[0]-1-e.o),Fr(E,e.c,v,u)),e.C=u,e.i=Y(e.i);else{if(Tr(e.c,e.z,f,1),4>B){if(Tr(e.c,e.S,e.i,1),B?(Tr(e.c,e.Y,e.i,1),1==B?Tr(e.c,e.ob,e.i,0):(Tr(e.c,e.ob,e.i,1),Tr(e.c,e.Mb,e.i,B-2))):(Tr(e.c,e.Y,e.i,0),1==P?Tr(e.c,e.Q,f,0):Tr(e.c,e.Q,f,1)),1==P?e.i=7>e.i?9:11:(kr(e.f,e.c,P-2,M),e.i=7>e.i?8:11),m=e.r[B],0!=B){for(d=B;d>=1;--d)e.r[d]=e.r[d-1];e.r[0]=m}}else{for(Tr(e.c,e.S,e.i,0),e.i=7>e.i?7:10,kr(e.P,e.c,P-2,M),B-=4,S=dr(B),l=W(P),Dr(e.D[l],e.c,S),S>=4&&(p=(S>>1)-1,i=(2|1&S)<<p,k=B-i,14>S?Hr(e.sb,i-S-1,e.c,p,k):(Wr(e.c,k>>4,p-4),Ir(e.M,e.c,15&k),++e.rb)),m=B,d=3;d>=1;--d)e.r[d]=e.r[d-1];e.r[0]=m,++e.pb}e.C=F(e.b,P-1-e.o)}if(e.o-=P,e.x=n(e.x,a(P)),!e.o){if(e.pb>=128&&$(e),e.rb>=16&&X(e),r[0]=e.x,t[0]=Yr(e.c),!z(e.b))return void er(e,c(e.x));if(o(h(e.x,b),[4096,0])>=0)return e.Gb=0,void(s[0]=0)}}}}function Q(e){var r,t;e.b||(r={},t=4,e.J||(t=2),G(r,t),e.b=r),Er(e.y,e.U,e.V),(e.R!=e.gb||e.kb!=e.j)&&(A(e.b,e.R,4096,e.j,274),e.gb=e.R,e.kb=e.j)}function U(e){var r;for(e.r=t(4),e.a=[],e.c={},e.z=t(192),e.S=t(12),e.Y=t(12),e.ob=t(12),e.Mb=t(12),e.Q=t(192),e.D=[],e.sb=t(114),e.M=xr({},4),e.P=Sr({}),e.f=Sr({}),e.y={},e.k=[],e.H=[],e.X=[],e.Jb=t(16),e.t=t(4),e.G=t(4),e.tb=[tt],e.Nb=[tt],e.$b=[0],e.Eb=t(5),e.Pb=t(128),e.hb=0,e.J=1,e.A=0,e.kb=-1,e.Z=0,r=0;4096>r;++r)e.a[r]={};for(r=0;4>r;++r)e.D[r]=xr({},6);return e}function X(e){for(var r=0;16>r;++r)e.Jb[r]=Or(e.M,r);e.rb=0}function $(e){var r,t,n,s,o,i,_,a;for(s=4;128>s;++s)i=dr(s),n=(i>>1)-1,r=(2|1&i)<<n,e.Pb[s]=Nr(e.sb,r-i-1,n,s-r);for(o=0;4>o;++o){for(t=e.D[o],_=o<<6,i=0;e.yb>i;++i)e.H[_+i]=Ar(t,i);for(i=14;e.yb>i;++i)e.H[_+i]+=(i>>1)-1-4<<6;for(a=128*o,s=0;4>s;++s)e.X[a+s]=e.H[_+s];for(;128>s;++s)e.X[a+s]=e.H[_+dr(s)]+e.Pb[s]}e.pb=0}function er(e,r){ar(e),pr(e,r&e.u);for(var t=0;5>t;++t)Vr(e.c)}function rr(e,r){var t,n,s,o,i,_,a,c,f,u,m,p,d,h,P,l,v,B,k,S,M,b,E,g,y,R,C,w,x,D,A,I,O,H,N,G,T,W,Z,V,j,q,J,Q,U,X,$,er,rr,or;if(e.W!=e.m)return d=e.a[e.m].n-e.m,e.Z=e.a[e.m].g,e.m=e.a[e.m].n,d;if(e.m=e.W=0,e.I?(p=e.hb,e.I=0):p=_r(e),C=e.A,y=z(e.b)+1,2>y)return e.Z=-1,1;for(y>273&&(y=273),V=0,f=0;4>f;++f)e.t[f]=e.r[f],e.G[f]=L(e.b,-1,e.t[f],273),e.G[f]>e.G[V]&&(V=f);if(e.G[V]>=e.j)return e.Z=V,d=e.G[V],ir(e,d-1),d;if(p>=e.j)return e.Z=e.k[C-1]+4,ir(e,p-1),p;if(a=F(e.b,-1),v=F(e.b,-e.r[0]-1-1),2>p&&a!=v&&2>e.G[V])return e.Z=-1,1;if(e.a[0].Yb=e.i,H=r&e.u,e.a[1].w=it[e.z[(e.i<<4)+H]>>>2]+zr(gr(e.y,r,e.C),e.i>=7,v,a),Cr(e.a[1]),B=it[2048-e.z[(e.i<<4)+H]>>>2],Z=B+it[2048-e.S[e.i]>>>2],v==a&&(j=Z+sr(e,e.i,H),e.a[1].w>j&&(e.a[1].w=j,wr(e.a[1]))),m=p>=e.G[V]?p:e.G[V],2>m)return e.Z=e.a[1].g,1;e.a[1].n=0,e.a[0].Ab=e.t[0],e.a[0].xb=e.t[1],e.a[0].wb=e.t[2],e.a[0].Lb=e.t[3],u=m;do e.a[u--].w=268435455;while(u>=2);for(f=0;4>f;++f)if(W=e.G[f],!(2>W)){G=Z+nr(e,f,e.i,H);do o=G+Mr(e.f,W-2,H),A=e.a[W],A.w>o&&(A.w=o,A.n=0,A.g=f,A.p=0);while(--W>=2)}if(g=B+it[e.S[e.i]>>>2],u=e.G[0]>=2?e.G[0]+1:2,p>=u){for(w=0;u>e.k[w];)w+=2;for(;c=e.k[w+1],o=g+tr(e,c,u,H),A=e.a[u],A.w>o&&(A.w=o,A.n=0,A.g=c+4,A.p=0),u!=e.k[w]||(w+=2,w!=C);++u);}for(t=0;;){if(++t,t==m)return K(e,t);if(k=_r(e),C=e.A,k>=e.j)return e.hb=k,e.I=1,K(e,t);if(++r,O=e.a[t].n,e.a[t].p?(--O,e.a[t].Sb?(J=e.a[e.a[t].n2].Yb,J=4>e.a[t].g2?7>J?8:11:7>J?7:10):J=e.a[O].Yb,J=Y(J)):J=e.a[O].Yb,O==t-1?J=e.a[t].g?Y(J):7>J?9:11:(e.a[t].p&&e.a[t].Sb?(O=e.a[t].n2,I=e.a[t].g2,J=7>J?8:11):(I=e.a[t].g,J=4>I?7>J?8:11:7>J?7:10),D=e.a[O],4>I?I?1==I?(e.t[0]=D.xb,e.t[1]=D.Ab,e.t[2]=D.wb,e.t[3]=D.Lb):2==I?(e.t[0]=D.wb,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.Lb):(e.t[0]=D.Lb,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.wb):(e.t[0]=D.Ab,e.t[1]=D.xb,e.t[2]=D.wb,e.t[3]=D.Lb):(e.t[0]=I-4,e.t[1]=D.Ab,e.t[2]=D.xb,e.t[3]=D.wb)),e.a[t].Yb=J,e.a[t].Ab=e.t[0],e.a[t].xb=e.t[1],e.a[t].wb=e.t[2],e.a[t].Lb=e.t[3],_=e.a[t].w,a=F(e.b,-1),v=F(e.b,-e.t[0]-1-1),H=r&e.u,n=_+it[e.z[(J<<4)+H]>>>2]+zr(gr(e.y,r,F(e.b,-2)),J>=7,v,a),b=e.a[t+1],S=0,b.w>n&&(b.w=n,b.n=t,b.g=-1,b.p=0,S=1),B=_+it[2048-e.z[(J<<4)+H]>>>2],Z=B+it[2048-e.S[J]>>>2],v!=a||t>b.n&&!b.g||(j=Z+(it[e.Y[J]>>>2]+it[e.Q[(J<<4)+H]>>>2]),b.w>=j&&(b.w=j,b.n=t,b.g=0,b.p=0,S=1)),R=z(e.b)+1,R=R>4095-t?4095-t:R,y=R,!(2>y)){if(y>e.j&&(y=e.j),!S&&v!=a&&(U=Math.min(R-1,e.j),P=L(e.b,0,e.t[0],U),P>=2)){for(Q=Y(J),N=r+1&e.u,E=n+it[2048-e.z[(Q<<4)+N]>>>2]+it[2048-e.S[Q]>>>2],x=t+1+P;x>m;)e.a[++m].w=268435455;o=E+(X=Mr(e.f,P-2,N),X+nr(e,0,Q,N)),A=e.a[x],A.w>o&&(A.w=o,A.n=t+1,A.g=0,A.p=1,A.Sb=0)}for(q=2,T=0;4>T;++T)if(h=L(e.b,-1,e.t[T],y),!(2>h)){l=h;do{for(;t+h>m;)e.a[++m].w=268435455;o=Z+($=Mr(e.f,h-2,H),$+nr(e,T,J,H)),A=e.a[t+h],A.w>o&&(A.w=o,A.n=t,A.g=T,A.p=0)}while(--h>=2);if(h=l,T||(q=h+1),R>h&&(U=Math.min(R-1-h,e.j),P=L(e.b,h,e.t[T],U),P>=2)){for(Q=7>J?8:11,N=r+h&e.u,s=Z+(er=Mr(e.f,h-2,H),er+nr(e,T,J,H))+it[e.z[(Q<<4)+N]>>>2]+zr(gr(e.y,r+h,F(e.b,h-1-1)),1,F(e.b,h-1-(e.t[T]+1)),F(e.b,h-1)),Q=Y(Q),N=r+h+1&e.u,M=s+it[2048-e.z[(Q<<4)+N]>>>2],E=M+it[2048-e.S[Q]>>>2],x=h+1+P;t+x>m;)e.a[++m].w=268435455;o=E+(rr=Mr(e.f,P-2,N),rr+nr(e,0,Q,N)),A=e.a[t+x],A.w>o&&(A.w=o,A.n=t+h+1,A.g=0,A.p=1,A.Sb=1,A.n2=t,A.g2=T)}}if(k>y){for(k=y,C=0;k>e.k[C];C+=2);e.k[C]=k,C+=2}if(k>=q){for(g=B+it[e.S[J]>>>2];t+k>m;)e.a[++m].w=268435455;for(w=0;q>e.k[w];)w+=2;for(h=q;;++h)if(i=e.k[w+1],o=g+tr(e,i,h,H),A=e.a[t+h],A.w>o&&(A.w=o,A.n=t,A.g=i+4,A.p=0),h==e.k[w]){if(R>h&&(U=Math.min(R-1-h,e.j),P=L(e.b,h,i,U),P>=2)){for(Q=7>J?7:10,N=r+h&e.u,s=o+it[e.z[(Q<<4)+N]>>>2]+zr(gr(e.y,r+h,F(e.b,h-1-1)),1,F(e.b,h-(i+1)-1),F(e.b,h-1)),Q=Y(Q),N=r+h+1&e.u,M=s+it[2048-e.z[(Q<<4)+N]>>>2],E=M+it[2048-e.S[Q]>>>2],x=h+1+P;t+x>m;)e.a[++m].w=268435455;o=E+(or=Mr(e.f,P-2,N),or+nr(e,0,Q,N)),A=e.a[t+x],A.w>o&&(A.w=o,A.n=t+h+1,A.g=0,A.p=1,A.Sb=1,A.n2=t,A.g2=i+4)}if(w+=2,w==C)break}}}}}function tr(e,r,t,n){var s,o=W(t);return s=128>r?e.X[128*o+r]:e.H[(o<<6)+hr(r)]+e.Jb[15&r],s+Mr(e.P,t-2,n)}function nr(e,r,t,n){var s;return r?(s=it[2048-e.Y[t]>>>2],1==r?s+=it[e.ob[t]>>>2]:(s+=it[2048-e.ob[t]>>>2],s+=jr(e.Mb[t],r-2))):(s=it[e.Y[t]>>>2],s+=it[2048-e.Q[(t<<4)+n]>>>2]),s}function sr(e,r,t){return it[e.Y[r]>>>2]+it[e.Q[(r<<4)+t]>>>2]}function or(e){q(e),Zr(e.c),Gr(e.z),Gr(e.Q),Gr(e.S),Gr(e.Y),Gr(e.ob),Gr(e.Mb),Gr(e.sb),yr(e.y);for(var r=0;4>r;++r)Gr(e.D[r].db);vr(e.P,1<<e.N),vr(e.f,1<<e.N),Gr(e.M.db),e.I=0,e.W=0,e.m=0,e.o=0}function ir(e,r){r>0&&(T(e.b,r),e.o+=r)}function _r(e){var r=0;return e.A=I(e.b,e.k),e.A>0&&(r=e.k[e.A-2],r==e.j&&(r+=L(e.b,r-1,e.k[e.A-1],273-r))),++e.o,r}function ar(e){e.b&&e.L&&(e.b.ac=null,e.L=0)}function cr(e){ar(e),e.c.cc=null}function fr(e,r){e.R=r;for(var t=0;r>1<<t;++t);e.yb=2*t}function ur(e,r){var t=e.J;e.J=r,e.b&&t!=e.J&&(e.gb=-1,e.b=null)}function mr(e,r){e.Eb[0]=9*(5*e.N+e.U)+e.V<<24>>24;for(var t=0;4>t;++t)e.Eb[1+t]=e.R>>8*t<<24>>24;S(r,e.Eb,0,5)}function pr(e,r){if(e.Xb){Tr(e.c,e.z,(e.i<<4)+r,1),Tr(e.c,e.S,e.i,0),e.i=7>e.i?7:10,kr(e.P,e.c,0,r);var t=W(2);Dr(e.D[t],e.c,63),Wr(e.c,67108863,26),Ir(e.M,e.c,15)}}function dr(e){return 2048>e?ot[e]:2097152>e?ot[e>>10]+20:ot[e>>20]+40}function hr(e){return 131072>e?ot[e>>6]+12:134217728>e?ot[e>>16]+32:ot[e>>26]+52}function Pr(e,r,t,n){8>t?(Tr(r,e.T,0,0),Dr(e.ub[n],r,t)):(t-=8,Tr(r,e.T,0,1),8>t?(Tr(r,e.T,1,0),Dr(e.vb[n],r,t)):(Tr(r,e.T,1,1),Dr(e.Bb,r,t-8)))}function lr(e){e.T=t(2),e.ub=t(16),e.vb=t(16),e.Bb=xr({},8);for(var r=0;16>r;++r)e.ub[r]=xr({},3),e.vb[r]=xr({},3);return e}function vr(e,r){Gr(e.T);for(var t=0;r>t;++t)Gr(e.ub[t].db),Gr(e.vb[t].db);Gr(e.Bb.db)}function Br(e,r,t,n,s){var o,i,_,a,c;for(o=it[e.T[0]>>>2],i=it[2048-e.T[0]>>>2],_=i+it[e.T[1]>>>2],a=i+it[2048-e.T[1]>>>2],c=0,c=0;8>c;++c){if(c>=t)return;n[s+c]=o+Ar(e.ub[r],c)}for(;16>c;++c){if(c>=t)return;n[s+c]=_+Ar(e.vb[r],c-8)}for(;t>c;++c)n[s+c]=a+Ar(e.Bb,c-8-8)}function kr(e,r,t,n){Pr(e,r,t,n),0==--e.Hb[n]&&(Br(e,n,e.fb,e.Tb,272*n),e.Hb[n]=e.fb)}function Sr(e){return lr(e),e.Tb=[],e.Hb=[],e}function Mr(e,r,t){return e.Tb[272*t+r]}function br(e,r){for(var t=0;r>t;++t)Br(e,t,e.fb,e.Tb,272*t),e.Hb[t]=e.fb}function Er(e,r,n){var s,o;if(null==e.Cb||e.O!=n||e.qb!=r)for(e.qb=r,e.ec=(1<<r)-1,e.O=n,o=1<<e.O+e.qb,e.Cb=t(o),s=0;o>s;++s)e.Cb[s]=Lr({})}function gr(e,r,t){return e.Cb[((r&e.ec)<<e.O)+((255&t)>>>8-e.O)]}function yr(e){var r,t=1<<e.O+e.qb;for(r=0;t>r;++r)Gr(e.Cb[r].eb)}function Rr(e,r,t){var n,s,o=1;for(s=7;s>=0;--s)n=t>>s&1,Tr(r,e.eb,o,n),o=o<<1|n}function Fr(e,r,t,n){var s,o,i,_,a=1,c=1;for(o=7;o>=0;--o)s=n>>o&1,_=c,a&&(i=t>>o&1,_+=1+i<<8,a=i==s),Tr(r,e.eb,_,s),c=c<<1|s}function Lr(e){return e.eb=t(768),e}function zr(e,r,t,n){var s,o,i=1,_=7,a=0;if(r)for(;_>=0;--_)if(o=t>>_&1,s=n>>_&1,a+=jr(e.eb[(1+o<<8)+i],s),i=i<<1|s,o!=s){--_;break}for(;_>=0;--_)s=n>>_&1,a+=jr(e.eb[i],s),i=i<<1|s;return a}function Cr(e){e.g=-1,e.p=0}function wr(e){e.g=0,e.p=0}function xr(e,r){return e.cb=r,e.db=t(1<<r),e}function Dr(e,r,t){var n,s,o=1;for(s=e.cb;0!=s;)--s,n=t>>>s&1,Tr(r,e.db,o,n),o=o<<1|n}function Ar(e,r){var t,n,s=1,o=0;for(n=e.cb;0!=n;)--n,t=r>>>n&1,o+=jr(e.db[s],t),s=(s<<1)+t;return o}function Ir(e,r,t){var n,s,o=1;for(s=0;e.cb>s;++s)n=1&t,Tr(r,e.db,o,n),o=o<<1|n,t>>=1}function Or(e,r){var t,n,s=1,o=0;for(n=e.cb;0!=n;--n)t=1&r,r>>>=1,o+=jr(e.db[s],t),s=s<<1|t;return o}function Hr(e,r,t,n,s){var o,i,_=1;for(i=0;n>i;++i)o=1&s,Tr(t,e,r+_,o),_=_<<1|o,s>>=1}function Nr(e,r,t,n){var s,o,i=1,_=0;for(o=t;0!=o;--o)s=1&n,n>>>=1,_+=it[(2047&(e[r+i]-s^-s))>>>2],i=i<<1|s;return _}function Gr(e){for(var r=e.length-1;r>=0;--r)e[r]=1024}function Tr(e,r,t,o){var i,_=r[t];i=(e.lb>>>11)*_,o?(e.Qb=n(e.Qb,s(a(i),[4294967295,0])),e.lb-=i,r[t]=_-(_>>>5)<<16>>16):(e.lb=i,r[t]=_+(2048-_>>>5)<<16>>16),-16777216&e.lb||(e.lb<<=8,Vr(e))}function Wr(e,r,t){for(var s=t-1;s>=0;--s)e.lb>>>=1,1==(r>>>s&1)&&(e.Qb=n(e.Qb,a(e.lb))),-16777216&e.lb||(e.lb<<=8,Vr(e))}function Yr(e){return n(n(a(e.mb),e.Fb),[4,0])}function Zr(e){e.Fb=tt,e.Qb=tt,e.lb=-1,e.mb=1,e.fc=0}function Vr(e){var r,t=c(d(e.Qb,32));if(0!=t||o(e.Qb,[4278190080,0])<0){e.Fb=n(e.Fb,a(e.mb)),r=e.fc;do k(e.cc,r+t),r=255;while(0!=--e.mb);e.fc=c(e.Qb)>>>24}++e.mb,e.Qb=m(s(e.Qb,[16777215,0]),8)}function jr(e,r){return it[(2047&(e-r^-r))>>>2]}function Kr(e){var r,t,n,s=[],o=0,i=e.length;if("object"==typeof e)return e;for(M(e,0,i,s,0),n=0;i>n;++n)r=s[n],r>=1&&127>=r?++o:o+=!r||r>=128&&2047>=r?2:3;for(t=[],o=0,n=0;i>n;++n)r=s[n],r>=1&&127>=r?t[o++]=r<<24>>24:!r||r>=128&&2047>=r?(t[o++]=(192|r>>6&31)<<24>>24,t[o++]=(128|63&r)<<24>>24):(t[o++]=(224|r>>12&15)<<24>>24,t[o++]=(128|r>>6&63)<<24>>24,t[o++]=(128|63&r)<<24>>24);return t}function qr(e){return e[1]+e[0]}function Jr(e,t,n,s){function o(){try{for(var e,r=(new Date).getTime();V(a.c.Ub);)if(i=qr(a.c.Ub.Ob)/qr(a.c.gc),(new Date).getTime()-r>200)return s(i),Xr(o,0),0;s(1),e=B(a.c._b),Xr(n.bind(null,e),0)}catch(t){n(null,t)}}var i,_,a={},c=void 0===n&&void 0===s;if("function"!=typeof n&&(_=n,n=s=0),s=s||function(e){return void 0!==_?r(e,_):void 0},n=n||function(e,r){return void 0!==_?postMessage({action:Qr,cbn:_,result:e,error:r}):void 0},c){for(a.c=y({},Kr(e),_t(t));V(a.c.Ub););return B(a.c._b)}try{a.c=y({},Kr(e),_t(t)),s(0)}catch(f){return n(null,f)}Xr(o,0)}var Qr=1,Ur=3,Xr="function"==typeof setImmediate?setImmediate:setTimeout,$r=4294967296,et=[4294967295,-$r],rt=[0,-0x8000000000000000],tt=[0,0],nt=[1,0],st=function(){var e,r,t,n=[];for(e=0;256>e;++e){for(t=e,r=0;8>r;++r)0!=(1&t)?t=t>>>1^-306674912:t>>>=1;n[e]=t}return n}(),ot=function(){var e,r,t,n=2,s=[0,1];for(t=2;22>t;++t)for(r=1<<(t>>1)-1,e=0;r>e;++e,++n)s[n]=t<<24>>24;return s}(),it=function(){var e,r,t,n,s=[];for(r=8;r>=0;--r)for(n=1<<9-r-1,e=1<<9-r,t=n;e>t;++t)s[t]=(r<<6)+(e-t<<6>>>9-r-1);return s}(),_t=function(){var e=[{s:16,f:64,m:0},{s:20,f:64,m:0},{s:19,f:64,m:1},{s:20,f:64,m:1},{s:21,f:128,m:1},{s:22,f:128,m:1},{s:23,f:128,m:1},{s:24,f:255,m:1},{s:25,f:255,m:1}];return function(r){return e[r-1]||e[6]}}();return"undefined"==typeof onmessage||"undefined"!=typeof window&&void 0!==window.document||!function(){onmessage=function(r){r&&r.Zb&&r.Zb.action==Qr&&lz_c.compress(r.Zb.Zb,r.Zb.jc,r.Zb.cbn)}}(),{compress:Jr}}();this.LZMA=this.LZMA_WORKER=lz_c;
 	var lz_d=function(){"use strict";function r(e,r){postMessage({action:nr,cbn:r,result:e})}function o(e){var r=[];return r[e-1]=void 0,r}function n(e,r){return i(e[0]+r[0],e[1]+r[1])}function t(e,r){var o,n;return e[0]==r[0]&&e[1]==r[1]?0:(o=0>e[1],n=0>r[1],o&&!n?-1:!o&&n?1:d(e,r)[1]<0?-1:1)}function i(e,r){var o,n;for(r%=0x10000000000000000,e%=0x10000000000000000,o=r%ir,n=Math.floor(e/ir)*ir,r=r-o+n,e=e-n+o;0>e;)e+=ir,r-=ir;for(;e>4294967295;)e-=ir,r+=ir;for(r%=0x10000000000000000;r>0x7fffffff00000000;)r-=0x10000000000000000;for(;-0x8000000000000000>r;)r+=0x10000000000000000;return[e,r]}function u(e){return e>=0?[e,0]:[e+ir,-ir]}function s(e){return e[0]>=2147483648?~~Math.max(Math.min(e[0]-ir,2147483647),-2147483648):~~Math.max(Math.min(e[0],2147483647),-2147483648)}function d(e,r){return i(e[0]-r[0],e[1]-r[1])}function c(e,r){return e.ab=r,e.cb=0,e.O=r.length,e}function m(e){return e.cb>=e.O?-1:255&e.ab[e.cb++]}function a(e){return e.ab=o(32),e.O=0,e}function _(e){var r=e.ab;return r.length=e.O,r}function f(e,r,o,n){p(r,o,e.ab,e.O,n),e.O+=n}function p(e,r,o,n,t){for(var i=0;t>i;++i)o[n+i]=e[r+i]}function D(e,r,o){var n,t,i,s,d="",c=[];for(t=0;5>t;++t){if(i=m(r),-1==i)throw Error("truncated input");c[t]=i<<24>>24}if(n=N({}),!z(n,c))throw Error("corrupted input");for(t=0;64>t;t+=8){if(i=m(r),-1==i)throw Error("truncated input");i=i.toString(16),1==i.length&&(i="0"+i),d=i+""+d}/^0+$|^f+$/i.test(d)?e.N=ur:(s=parseInt(d,16),e.N=s>4294967295?ur:u(s)),e.Q=B(n,r,o,e.N)}function l(e,r){return e.S=a({}),D(e,c({},r),e.S),e}function g(e,r,o){var n=e.D-r-1;for(0>n&&(n+=e.c);0!=o;--o)n>=e.c&&(n=0),e.x[e.D++]=e.x[n++],e.D>=e.c&&w(e)}function v(e,r){(null==e.x||e.c!=r)&&(e.x=o(r)),e.c=r,e.D=0,e.w=0}function w(e){var r=e.D-e.w;r&&(f(e.V,e.x,e.w,r),e.D>=e.c&&(e.D=0),e.w=e.D)}function R(e,r){var o=e.D-r-1;return 0>o&&(o+=e.c),e.x[o]}function h(e,r){e.x[e.D++]=r,e.D>=e.c&&w(e)}function P(e){w(e),e.V=null}function C(e){return e-=2,4>e?e:3}function S(e){return 4>e?0:10>e?e-3:e-6}function M(e,r){return e.h=r,e.bb=null,e.X=1,e}function L(e){if(!e.X)throw Error("bad state");if(e.bb)throw Error("No encoding");return y(e),e.X}function y(e){var r=I(e.h);if(-1==r)throw Error("corrupted input");e.$=ur,e.Z=e.h.d,(r||t(e.h.U,sr)>=0&&t(e.h.d,e.h.U)>=0)&&(w(e.h.b),P(e.h.b),e.h.a.K=null,e.X=0)}function B(e,r,o,n){return e.a.K=r,P(e.b),e.b.V=o,b(e),e.f=0,e.l=0,e.T=0,e.R=0,e._=0,e.U=n,e.d=sr,e.I=0,M({},e)}function I(e){var r,o,i,d,c,m;if(m=s(e.d)&e.P,Q(e.a,e.q,(e.f<<4)+m)){if(Q(e.a,e.E,e.f))i=0,Q(e.a,e.s,e.f)?(Q(e.a,e.u,e.f)?(Q(e.a,e.r,e.f)?(o=e._,e._=e.R):o=e.R,e.R=e.T):o=e.T,e.T=e.l,e.l=o):Q(e.a,e.n,(e.f<<4)+m)||(e.f=7>e.f?9:11,i=1),i||(i=x(e.o,e.a,m)+2,e.f=7>e.f?8:11);else if(e._=e.R,e.R=e.T,e.T=e.l,i=2+x(e.C,e.a,m),e.f=7>e.f?7:10,c=q(e.j[C(i)],e.a),c>=4){if(d=(c>>1)-1,e.l=(2|1&c)<<d,14>c)e.l+=J(e.J,e.l-c-1,e.a,d);else if(e.l+=U(e.a,d-4)<<4,e.l+=F(e.t,e.a),0>e.l)return-1==e.l?1:-1}else e.l=c;if(t(u(e.l),e.d)>=0||e.l>=e.m)return-1;g(e.b,e.l,i),e.d=n(e.d,u(i)),e.I=R(e.b,0)}else r=Z(e.k,s(e.d),e.I),e.I=7>e.f?T(r,e.a):$(r,e.a,R(e.b,e.l)),h(e.b,e.I),e.f=S(e.f),e.d=n(e.d,dr);return 0}function N(e){e.b={},e.a={},e.q=o(192),e.E=o(12),e.s=o(12),e.u=o(12),e.r=o(12),e.n=o(192),e.j=o(4),e.J=o(114),e.t=K({},4),e.C=G({}),e.o=G({}),e.k={};for(var r=0;4>r;++r)e.j[r]=K({},6);return e}function b(e){e.b.w=0,e.b.D=0,X(e.q),X(e.n),X(e.E),X(e.s),X(e.u),X(e.r),X(e.J),H(e.k);for(var r=0;4>r;++r)X(e.j[r].B);A(e.C),A(e.o),X(e.t.B),V(e.a)}function z(e,r){var o,n,t,i,u,s,d;if(5>r.length)return 0;for(d=255&r[0],t=d%9,s=~~(d/9),i=s%5,u=~~(s/5),o=0,n=0;4>n;++n)o+=(255&r[1+n])<<8*n;return o>99999999||!W(e,t,i,u)?0:O(e,o)}function O(e,r){return 0>r?0:(e.z!=r&&(e.z=r,e.m=Math.max(e.z,1),v(e.b,Math.max(e.m,4096))),1)}function W(e,r,o,n){if(r>8||o>4||n>4)return 0;E(e.k,o,r);var t=1<<n;return k(e.C,t),k(e.o,t),e.P=t-1,1}function k(e,r){for(;r>e.e;++e.e)e.G[e.e]=K({},3),e.H[e.e]=K({},3)}function x(e,r,o){if(!Q(r,e.M,0))return q(e.G[o],r);var n=8;return n+=Q(r,e.M,1)?8+q(e.L,r):q(e.H[o],r)}function G(e){return e.M=o(2),e.G=o(16),e.H=o(16),e.L=K({},8),e.e=0,e}function A(e){X(e.M);for(var r=0;e.e>r;++r)X(e.G[r].B),X(e.H[r].B);X(e.L.B)}function E(e,r,n){var t,i;if(null==e.F||e.g!=n||e.y!=r)for(e.y=r,e.Y=(1<<r)-1,e.g=n,i=1<<e.g+e.y,e.F=o(i),t=0;i>t;++t)e.F[t]=j({})}function Z(e,r,o){return e.F[((r&e.Y)<<e.g)+((255&o)>>>8-e.g)]}function H(e){var r,o;for(o=1<<e.g+e.y,r=0;o>r;++r)X(e.F[r].v)}function T(e,r){var o=1;do o=o<<1|Q(r,e.v,o);while(256>o);return o<<24>>24}function $(e,r,o){var n,t,i=1;do if(t=o>>7&1,o<<=1,n=Q(r,e.v,(1+t<<8)+i),i=i<<1|n,t!=n){for(;256>i;)i=i<<1|Q(r,e.v,i);break}while(256>i);return i<<24>>24}function j(e){return e.v=o(768),e}function K(e,r){return e.A=r,e.B=o(1<<r),e}function q(e,r){var o,n=1;for(o=e.A;0!=o;--o)n=(n<<1)+Q(r,e.B,n);return n-(1<<e.A)}function F(e,r){var o,n,t=1,i=0;for(n=0;e.A>n;++n)o=Q(r,e.B,t),t<<=1,t+=o,i|=o<<n;return i}function J(e,r,o,n){var t,i,u=1,s=0;for(i=0;n>i;++i)t=Q(o,e,r+u),u<<=1,u+=t,s|=t<<i;return s}function Q(e,r,o){var n,t=r[o];return n=(e.i>>>11)*t,(-2147483648^n)>(-2147483648^e.p)?(e.i=n,r[o]=t+(2048-t>>>5)<<16>>16,-16777216&e.i||(e.p=e.p<<8|m(e.K),e.i<<=8),0):(e.i-=n,e.p-=n,r[o]=t-(t>>>5)<<16>>16,-16777216&e.i||(e.p=e.p<<8|m(e.K),e.i<<=8),1)}function U(e,r){var o,n,t=0;for(o=r;0!=o;--o)e.i>>>=1,n=e.p-e.i>>>31,e.p-=e.i&n-1,t=t<<1|1-n,-16777216&e.i||(e.p=e.p<<8|m(e.K),e.i<<=8);return t}function V(e){e.p=0,e.i=-1;for(var r=0;5>r;++r)e.p=e.p<<8|m(e.K)}function X(e){for(var r=e.length-1;r>=0;--r)e[r]=1024}function Y(e){for(var r,o,n,t=0,i=0,u=e.length,s=[],d=[];u>t;++t,++i){if(r=255&e[t],128&r)if(192==(224&r)){if(t+1>=u)return e;if(o=255&e[++t],128!=(192&o))return e;d[i]=(31&r)<<6|63&o}else{if(224!=(240&r))return e;if(t+2>=u)return e;if(o=255&e[++t],128!=(192&o))return e;if(n=255&e[++t],128!=(192&n))return e;d[i]=(15&r)<<12|(63&o)<<6|63&n}else{if(!r)return e;d[i]=r}16383==i&&(s.push(String.fromCharCode.apply(String,d)),i=-1)}return i>0&&(d.length=i,s.push(String.fromCharCode.apply(String,d))),s.join("")}function er(e){return e[1]+e[0]}function rr(e,o,n){function t(){try{for(var e,r=0,u=(new Date).getTime();L(c.d.Q);)if(++r%1e3==0&&(new Date).getTime()-u>200)return s&&(i=er(c.d.Q.h.d)/d,n(i)),tr(t,0),0;n(1),e=Y(_(c.d.S)),tr(o.bind(null,e),0)}catch(m){o(null,m)}}var i,u,s,d,c={},m=void 0===o&&void 0===n;if("function"!=typeof o&&(u=o,o=n=0),n=n||function(e){return void 0!==u?r(s?e:-1,u):void 0},o=o||function(e,r){return void 0!==u?postMessage({action:or,cbn:u,result:e,error:r}):void 0},m){for(c.d=l({},e);L(c.d.Q););return Y(_(c.d.S))}try{c.d=l({},e),d=er(c.d.N),s=d>-1,n(0)}catch(a){return o(null,a)}tr(t,0)}var or=2,nr=3,tr="function"==typeof setImmediate?setImmediate:setTimeout,ir=4294967296,ur=[4294967295,-ir],sr=[0,0],dr=[1,0];return"undefined"==typeof onmessage||"undefined"!=typeof window&&void 0!==window.document||!function(){onmessage=function(r){r&&r.W&&r.W.action==or&&lz_d.decompress(r.W.W,r.W.cbn)}}(),{decompress:rr}}();this.LZMA=this.LZMA_WORKER=lz_d;
@@ -2335,6 +2439,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	const stablehorde_url = "https://stablehorde.net";
 	const poll_interval_base_text = 500;
 	const poll_interval_base_img = 3800;
+	const poll_interval_background = 1000;
 
 	const text_hordes = [		
 		{
@@ -2381,9 +2486,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	const kobold_custom_maxctxlen_endpoint = "/api/v1/config/max_context_length";
 	const kobold_custom_genamt_endpoint = "/api/v1/config/max_length";	
 
-	const oai_models_endpoint = "/v1/models";
-	const oai_submit_endpoint = "/v1/completions";
-	const oai_submit_endpoint_turbo = "/v1/chat/completions";
+	const oai_models_endpoint = "/models";
+	const oai_submit_endpoint = "/completions";
+	const oai_submit_endpoint_turbo = "/chat/completions";
 
 	const scale_submit_endpoint = "https://dashboard.scale.com/spellbook/api/v2/deploy/"	
 
@@ -2444,7 +2549,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	var localmodeport = 5000;
 	var localmodehost = "localhost";
 	var kobold_endpoint_version = ""; //used to track problematic versions to avoid sending extra fields
-
+	
 	var localsettings = {
 		my_api_key: "0000000000", //put here so it can be saved and loaded in persistent mode
 		home_cluster: text_hordes[0].baseurl, //which horde does this api key belongs to
@@ -2459,6 +2564,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		chatopponent: defaultchatopponent,
 		instruct_starttag: "### Instruction:",
 		instruct_endtag: "### Response:",
+		instruct_has_newlines: true,
+		instruct_has_markdown: false,
 		persist_session: true,
 		speech_synth: 0, //0 is disabled
 		beep_on: false,
@@ -2471,8 +2578,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		last_selected_preset: 0,
 		enhanced_chat_ui: true,
 		multiline_replies: true,
+		idle_responses: false,
+		idle_duration: 60,
 		export_settings: true, //affects if settings are included with the story and sharelinks
-		filter_comments: true, //on by default - filters [<|comments|>]
 
 		max_context_length: 1024,
 		max_length: 80,
@@ -2613,6 +2721,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		//start the polling script for async generation status checking every Xs
 		setInterval(poll_pending_response, poll_interval_base_text);
 		setInterval(poll_image_db, poll_interval_base_img); //check images every Xs
+		setInterval(poll_background_tasks, poll_interval_background); //a basic update loop for running background tasks
 
 		attempt_connect(false);
 
@@ -2937,7 +3046,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 				//can we find the model that's used? if yes load it, otherwise load the first one
 				if (mdls.length == 0 && !localmode) {
 					msgbox("No models available. Unable to load.");
-				} 
+				}
 				else 
 				{
 					if(!localmode)
@@ -2961,39 +3070,40 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 							let pickedcluster = get_most_common_cluster(selected_models);
 							selected_models = selected_models.filter(item => item.cluster === pickedcluster);
 						}
-					}
-					restart_new_game();
-
-					gametext_arr = story.ga;
-					migrate_old_images_in_gametext();
-					if (story.ca && story.ca != "") {
-						current_anote = story.ca;
-						current_anotetemplate = story.ct;
-					}
-					if (story.cm && story.cm != "") {
-						current_memory = story.cm;
-					}
-					if (story.cwi && story.cwi.length > 0) {
-						current_wi = story.cwi;
-					}
-					if(story.ess && story.ess!="")
-					{
-						extrastopseq = story.ess;
-					}
-
-					//handle importing settings
-					if (story.savedsettings && story.savedsettings != "") {
-						let tmpapikey1 = localsettings.my_api_key;
-						let tmphc = localsettings.home_cluster;
-						import_props_into_object(localsettings, story.savedsettings);
-						localsettings.my_api_key = tmpapikey1;
-						localsettings.home_cluster = tmphc;
-					}
-
-					render_gametext();
+						render_gametext();
+					}					
 				}
 			});
 
+			restart_new_game();
+
+			gametext_arr = story.ga;
+			migrate_old_images_in_gametext();
+			if (story.ca && story.ca != "") {
+				current_anote = story.ca;
+				current_anotetemplate = story.ct;
+			}
+			if (story.cm && story.cm != "") {
+				current_memory = story.cm;
+			}
+			if (story.cwi && story.cwi.length > 0) {
+				current_wi = story.cwi;
+			}
+			if(story.ess && story.ess!="")
+			{
+				extrastopseq = story.ess;
+			}
+
+			//handle importing settings
+			if (story.savedsettings && story.savedsettings != "") {
+				let tmpapikey1 = localsettings.my_api_key;
+				let tmphc = localsettings.home_cluster;
+				import_props_into_object(localsettings, story.savedsettings);
+				localsettings.my_api_key = tmpapikey1;
+				localsettings.home_cluster = tmphc;
+			}
+
+
 		} else {
 			msgbox("Could not import from URL. Is it valid?");
 		}
@@ -3954,10 +4064,11 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 						//if it still fails, then show error
 						console.log("Error: " + error);
 
-						let is_localhost = (custom_kobold_endpoint.toLowerCase().includes("localhost")
-							|| custom_kobold_endpoint.toLowerCase().includes("127.0.0.1"));
+						let is_local = (custom_kobold_endpoint.toLowerCase().includes("localhost")
+							|| custom_kobold_endpoint.toLowerCase().includes("127.0.0.1")
+							|| custom_kobold_endpoint.toLowerCase().includes("192.168."));
 
-						if (uses_cors_proxy || is_localhost) {
+						if (uses_cors_proxy || is_local) {
 							msgbox("Failed to connect to Custom Kobold Endpoint!\n\nPlease check if KoboldAI is running at the url: " + tmpep + "");
 							selected_models = [];
 							selected_workers = [];
@@ -3979,6 +4090,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			{
 				desired_oai_ep = desired_oai_ep.slice(0, -1);
 			}
+			if(desired_oai_ep!="" && desired_oai_ep.length > 4 && !desired_oai_ep.slice(-4).toLowerCase().includes("/v")) {
+				desired_oai_ep = desired_oai_ep + "/v1";
+			}
 			if(desired_oai_key!="" && desired_oai_ep!="")
 			{				
 				hide_popups();
@@ -4461,7 +4575,6 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		document.getElementById("top_p").value = document.getElementById("top_p_slide").value = localsettings.top_p;
 		document.getElementById("autoscroll").checked = localsettings.autoscroll;
 		document.getElementById("export_settings").checked = localsettings.export_settings;
-		document.getElementById("filter_comments").checked = localsettings.filter_comments;
 		document.getElementById("trimsentences").checked = localsettings.trimsentences;
 		document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace;
 		document.getElementById("persist_session").checked = localsettings.persist_session;
@@ -4469,15 +4582,19 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		document.getElementById("chatname").value = localsettings.chatname;
 		document.getElementById("chatopponent").value = localsettings.chatopponent;
 		document.getElementById("instruct_starttag").value = localsettings.instruct_starttag;
-		document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;
+		document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;		
 		document.getElementById("top_k").value = localsettings.top_k;
 		document.getElementById("top_a").value = localsettings.top_a;
 		document.getElementById("typ_s").value = localsettings.typ_s;
 		document.getElementById("tfs_s").value = localsettings.tfs_s;
 		document.getElementById("generate_images").value = localsettings.generate_images;
 		document.getElementById("enhanced_chat_ui").checked = localsettings.enhanced_chat_ui;
-		document.getElementById("multiline_replies").checked = localsettings.multiline_replies;		
+		document.getElementById("multiline_replies").checked = localsettings.multiline_replies;
+		document.getElementById("idle_responses").checked = localsettings.idle_responses;
+		document.getElementById("idle_duration").value = localsettings.idle_duration;
 		document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
+		document.getElementById("instruct_has_newlines").checked = localsettings.instruct_has_newlines;
+		document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
 		document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
 		document.getElementById("auto_genamt").checked = localsettings.auto_genamt;
 		pendingstyle = localsettings.image_styles;
@@ -4614,14 +4731,17 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		localsettings.rep_pen_range = document.getElementById("rep_pen_range").value;
 		localsettings.top_p = document.getElementById("top_p").value;
 		localsettings.autoscroll = (document.getElementById("autoscroll").checked ? true : false);
-		localsettings.export_settings = (document.getElementById("export_settings").checked ? true : false);
-		localsettings.filter_comments = (document.getElementById("filter_comments").checked ? true : false);		
+		localsettings.export_settings = (document.getElementById("export_settings").checked ? true : false);		
 		localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false);
 		localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false);
 		localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
 		localsettings.enhanced_chat_ui = (document.getElementById("enhanced_chat_ui").checked ? true : false);
 		localsettings.multiline_replies = (document.getElementById("multiline_replies").checked ? true : false);
+		localsettings.idle_responses = (document.getElementById("idle_responses").checked ? true : false);
+		localsettings.idle_duration = document.getElementById("idle_duration").value;
 		localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
+		localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
+		localsettings.instruct_has_newlines = (document.getElementById("instruct_has_newlines").checked ? true : false);
 		localsettings.generate_images = document.getElementById("generate_images").value;
 		localsettings.opmode = document.getElementById("opmode").value;
 		localsettings.chatname = document.getElementById("chatname").value;
@@ -4673,7 +4793,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		localsettings.last_selected_preset = document.getElementById("presets").value;
 
 		//clean and clamp invalid values
-		localsettings.max_context_length = cleannum(localsettings.max_context_length, 8, 8192);
+		localsettings.max_context_length = cleannum(localsettings.max_context_length, 8, 32000);
 		localsettings.max_length = cleannum(localsettings.max_length, 1, 1024);
 		localsettings.temperature = cleannum(localsettings.temperature, 0.01, 5);
 		localsettings.rep_pen = cleannum(localsettings.rep_pen, 0.1, 5);
@@ -4746,6 +4866,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 	}
 
 	function restart_new_game() {
+		idle_timer = 0;
 		gametext_arr = [];
 		redo_arr = [];
 		retry_prev_text = "";
@@ -4791,11 +4912,12 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		let p1 = input.lastIndexOf(".");
 		let p2 = input.lastIndexOf("!");
 		let p3 = input.lastIndexOf("?");
-		let last = Math.max(p1, p2, p3);
+		let p4 = input.lastIndexOf("`");
+		let last = Math.max(p1, p2, p3, p4);
 		if(include_newline)
 		{
-			let p4 = input.lastIndexOf("\n");
-			last = Math.max(last, p4);
+			let p5 = input.lastIndexOf("\n");
+			last = Math.max(last, p5);
 		}
 		if (last > 0) {
 			return input.substring(0, last + 1).trimEnd();
@@ -4903,6 +5025,8 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		
 		if (newgen.trim() != "" || gametext_arr.length > 0 || current_memory != "" || current_anote != "") 
 		{
+			idle_timer = 0;
+			idle_triggered_counter = 0;
 			if (localsettings.speech_synth > 0 && 'speechSynthesis' in window) {
 				let utterance = new window.SpeechSynthesisUtterance(newgen);
 				utterance.voice = window.speechSynthesis.getVoices()[localsettings.speech_synth - 1];
@@ -4911,7 +5035,15 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 			if (localsettings.opmode == 4 && newgen != "") {
 				//append instruction for instruct mode
-				newgen = "\n\n"+localsettings.instruct_starttag+"\n\n" + newgen + "\n\n"+localsettings.instruct_endtag+"\n\n";
+				if(localsettings.instruct_has_newlines)
+				{
+					newgen = "\n\n"+localsettings.instruct_starttag+"\n\n" + newgen + "\n\n"+localsettings.instruct_endtag+"\n\n";
+				}
+				else
+				{
+					newgen = localsettings.instruct_starttag + newgen + localsettings.instruct_endtag;
+				}
+				
 			}
 			if (localsettings.opmode == 3 && newgen != "") {
 				//append chatname for chatmode
@@ -5013,13 +5145,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			if (localsettings.trimwhitespace) {
 				truncated_context = truncated_context.replace(/[\t ]+$/, '');
 			}
-
-			//for instruct mode, add this
-			if (localsettings.opmode == 4 && current_memory.length == 0)
-			{
-				let injected = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n";
-				truncated_context = injected + truncated_context;
-			}
+		
 
 			//for adventure mode, inject hidden context, even more if there's nothing in memory
 			if (localsettings.opmode == 2  && localsettings.adventure_context_mod)
@@ -5274,7 +5400,9 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 							}
 							if (localsettings.opmode == 4) //stop on selfname found
 							{
-								submit_payload.stop_sequence = ["\n" + localsettings.instruct_starttag, "\n" + localsettings.instruct_endtag];
+								let st = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_starttag):(localsettings.instruct_starttag);
+								let et = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_endtag):(localsettings.instruct_endtag);
+								submit_payload.stop_sequence = [st, et];
 							}
 							if(extrastopseq!="")
 							{
@@ -5319,7 +5447,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 							"logit_bias": { "50256": -100 },
 						}
 
-						if (custom_oai_model == "gpt-3.5-turbo" || custom_oai_model == "gpt-4") {
+						if (custom_oai_model == "gpt-3.5-turbo" || custom_oai_model == "gpt-4" || custom_oai_model == "gpt-4-32k") {
 							targetep = (custom_oai_endpoint + oai_submit_endpoint_turbo);
 							if (document.getElementById("jailbreakprompt") && document.getElementById("jailbreakprompt").checked) {
 								oai_payload.messages = [
@@ -5736,19 +5864,21 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		//if we are in instruct mode, truncate to instruction
 		if (localsettings.opmode == 4)
 		{
-			let found = gentxt.indexOf("\n"+localsettings.instruct_starttag);
+			let st = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_starttag):(localsettings.instruct_starttag);
+			let et = localsettings.instruct_has_newlines?("\n"+localsettings.instruct_endtag):(localsettings.instruct_endtag);
+			let found = gentxt.indexOf(st);
 			let splitresponse = [];
 			if (found != -1) //if found, truncate to it
 			{			
-				splitresponse = gentxt.split("\n"+localsettings.instruct_starttag);
+				splitresponse = gentxt.split(st);
 				gentxt = splitresponse[0];
 			}
 
-			found = gentxt.indexOf("\n"+localsettings.instruct_endtag);
+			found = gentxt.indexOf(et);
 			splitresponse = [];
 			if (found != -1) //if found, truncate to it
 			{			
-				splitresponse = gentxt.split("\n"+localsettings.instruct_endtag);
+				splitresponse = gentxt.split(et);
 				gentxt = splitresponse[0];
 			}
 		}
@@ -5889,6 +6019,29 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		img.src = inputDataUri;
 	}
 
+	//runs every second
+	var idle_timer = 0; //used in chat mode to send multi replies
+	var idle_triggered_counter = 0;
+	function poll_background_tasks()
+	{	
+		let idle_timer_max = localsettings.idle_duration*1000;
+		let newgenempty = (document.getElementById("input_text").value == "");
+		let	chatinputempty = (document.getElementById("cht_inp").value == "");
+		if (localsettings.opmode == 3 && localsettings.idle_responses && newgenempty && chatinputempty && !document.getElementById("btnsend").disabled && idle_triggered_counter<3) 
+		{
+			idle_timer += 1000;
+			if (idle_timer > idle_timer_max) {
+				idle_timer = 0;
+				let nextcounter = ++idle_triggered_counter;			
+				submit_generation();
+				idle_triggered_counter = nextcounter;
+			}
+			console.log("Idling: " + idle_timer + ", " + idle_triggered_counter);
+		} else {
+			idle_timer = 0;
+		}
+	}
+
 	//clock speed is 500ms per tick
 	function poll_pending_response() 
 	{
@@ -6048,6 +6201,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					prev_hl_chunk = selection.focusNode.parentElement;
 					prev_hl_chunk.classList.add("hlchunk");
 				}
+				idle_timer = 0;
 			}
 		}
 	}
@@ -6178,10 +6332,17 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			});
 			if(localsettings.opmode==3 && localsettings.chatname!="" && localsettings.chatopponent!="")
 			{
-				let a = escapeHtml(localsettings.chatname);
-				let b = escapeHtml(localsettings.chatopponent);
+				let a = escapeHtml(localsettings.chatname);				
 				fulltxt = replaceAll(fulltxt,a,localsettings.chatname);
-				fulltxt = replaceAll(fulltxt,b,localsettings.chatopponent);
+
+				// let b = escapeHtml(localsettings.chatopponent);
+				// fulltxt = replaceAll(fulltxt,b,localsettings.chatopponent);
+
+				//unescape other chat opponents too (match anything that is NOT us)		
+				var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi");
+				fulltxt = fulltxt.replace(regex, function (m) {
+					return unescapeHtml(m);
+				});
 			}
 			if(localsettings.opmode==4 && localsettings.instruct_starttag!="" && localsettings.instruct_endtag!="")
 			{
@@ -6194,9 +6355,10 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		if (stripimg) {
 			fulltxt = fulltxt.replace(/\[<\|p\|.+?\|p\|>\]/g, stripimg_replace_str);
 			fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, stripimg_replace_str);
-			if (localsettings.filter_comments) {
-				fulltxt = fulltxt.replace(/\[<\|.+?\|>\]/g, ""); //remove normal comments too
-			}
+			
+			//always filter comments - new format
+			fulltxt = fulltxt.replace(/\[<\|.+?\|>\]/g, ""); //remove normal comments too
+			
 		}
 		return fulltxt;
 	}
@@ -6291,12 +6453,36 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			}
 
 			if(localsettings.opmode==4 && !document.getElementById("allowediting").checked)
-			{
-				fulltxt = replaceAll(fulltxt, "\n\n"+localsettings.instruct_starttag+"\n\n", `\n\n<span class="hidden">`+localsettings.instruct_starttag+`\n</span><hr>\n<span class="color_cyan"><img src="`+human_square+`" style="padding:8px;border-radius: 8%;"/>`);
-				fulltxt = replaceAll(fulltxt, "\n\n"+localsettings.instruct_endtag+"\n\n", `</span>\n\n<hr><span class="hidden">`+localsettings.instruct_endtag+`\n</span>\n<img src="`+niko_square+`" style="padding:8px;border-radius: 8%;"/>`);
+			{				
+				if(localsettings.instruct_has_newlines)
+				{
+					fulltxt = replaceAll(fulltxt, "\n\n"+localsettings.instruct_starttag+"\n\n", `<br><br><hr><br><span class="color_cyan"><img src="`+human_square+`" style="padding:8px;border-radius: 8%;"/>`);
+					fulltxt = replaceAll(fulltxt, "\n\n"+localsettings.instruct_endtag+"\n\n", `</span><br><br><hr><br><img src="`+niko_square+`" style="padding:8px;border-radius: 8%;"/>`);
+				}else{
+					fulltxt = replaceAll(fulltxt, localsettings.instruct_starttag, `<br><br><hr><br><span class="color_cyan"><img src="`+human_square+`" style="padding:8px;border-radius: 8%;"/>`);
+					fulltxt = replaceAll(fulltxt, localsettings.instruct_endtag, `</span><br><br><hr><br><img src="`+niko_square+`" style="padding:8px;border-radius: 8%;"/>`);
+				}
+
+				if(localsettings.instruct_has_markdown && synchro_pending_stream=="")
+				{
+					let codeblockcount = (fulltxt.match(/```/g) || []).length;
+					if(codeblockcount>0 && codeblockcount%2!=0 )
+					{
+						fulltxt += "```"; //force end code block
+					}
+					fulltxt = simpleMarkdown(fulltxt);
+				}
 			}else{
-				fulltxt = replaceAll(fulltxt, "\n"+localsettings.instruct_starttag, `<span class="color_gray">\n`+localsettings.instruct_starttag+`</span>`);
-				fulltxt = replaceAll(fulltxt, "\n"+localsettings.instruct_endtag, `<span class="color_gray">\n`+localsettings.instruct_endtag+`</span>`);
+				if(localsettings.instruct_has_newlines)
+				{
+					fulltxt = replaceAll(fulltxt, "\n"+localsettings.instruct_starttag, `<span class="color_gray">\n`+localsettings.instruct_starttag+`</span>`);
+					fulltxt = replaceAll(fulltxt, "\n"+localsettings.instruct_endtag, `<span class="color_gray">\n`+localsettings.instruct_endtag+`</span>`);
+				}
+				else
+				{
+					fulltxt = replaceAll(fulltxt, localsettings.instruct_starttag, `<span class="color_gray">`+localsettings.instruct_starttag+`</span>`);
+					fulltxt = replaceAll(fulltxt, localsettings.instruct_endtag, `<span class="color_gray">`+localsettings.instruct_endtag+`</span>`);
+				}
 			}
 
 			if(localsettings.opmode==3)
@@ -6306,10 +6492,17 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			
 				//match anything that is NOT us, ie. opponents				
 				var regex = new RegExp("\n(?!" + localsettings.chatname + ").+?\: ", "gi");
+				let colormap = {}, colidx = 0;
 				fulltxt = fulltxt.replace(regex, function (m) {
-					return `<span class="color_red">` + m + `</span>`;
+					let oname = escapeHtml(m);
+					if(colormap[oname]==null)
+					{
+						colormap[oname] = GetUniqueColor(colidx);
+						++colidx;
+					}
+					return `<span class="`+colormap[oname]+`">` + oname + `</span>`;
 				});
-				fulltxt = replaceAll(fulltxt,m_name, `<span class="color_blue">` + m_name + `</span>`);
+				fulltxt = replaceAll(fulltxt,m_name, `<span class="color_blue">` + escapeHtml(m_name) + `</span>`);
 			
 			}
 
@@ -6469,6 +6662,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			}		
 		}
 
+		idle_timer = 0;
 		document.getElementById("token-budget").innerText = last_token_budget;
 	}
 
@@ -6535,6 +6729,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 			
 		}
 
+		let colormap = {}, colidx = 0;	
 		for(var i=0;i<chatunits.length;++i)
 		{
 			let curr = chatunits[i];
@@ -6559,10 +6754,16 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 
 			if(curr.myturn)
 			{
-				let namepart = (curr.name!=""?`<span style="font-weight: bolder;color:#15e4c8b9;">`+curr.name+`</span><br>`:"");
+				let namepart = (curr.name!=""?`<span style="font-weight: bolder;color:#15e4c8b9;">`+escapeHtml(curr.name)+`</span><br>`:"");
 				newbodystr += `<div class="chat_outgoing_msg"><div class="chat_sent_msg"><p>`+namepart+curr.msg+`</p></div></div>`;
 			}else{
-				let namepart = (curr.name!=""?`<span style="font-weight: bolder;color:#e26374b9;">`+curr.name+`</span><br>`:"");
+				let oname = escapeHtml(curr.name);
+				if(colormap[oname]==null)
+				{
+					colormap[oname] = GetUniqueColor(colidx);
+					++colidx;
+				}
+				let namepart = (curr.name!=""?`<span class='`+colormap[oname]+`' style="font-weight: bolder;">`+oname+`</span><br>`:"");
 				newbodystr += `<div class="incoming_msg"><div class="chat_received_msg"><div class="chat_received_withd_msg"><p>`+namepart+curr.msg+`</p></div></div></div>`;
 			}
 		}
@@ -6735,7 +6936,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 		<input class="form-control wiinputkey" id="wikey`+ i + `" placeholder="Key(s)" value="` + winame + `">
 		<input class="form-control wiinputkey `+ (curr.selective ? `` : `hidden`) + `" id="wikeysec` + i + `" placeholder="Sec. Key(s)" value="` + wisec + `">` + `</td>
 		<td class="col-10">
-		<input class="form-control wiinputval" id="wival`+ i + `" placeholder="What To Remember" value="` + witxt + `">
+		<textarea class="form-control wiinputval" id="wival`+ i + `" placeholder="What To Remember" rows="1">` + witxt + `</textarea>
 		</td>`+
 				`
 		<td>
@@ -7122,6 +7323,7 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 					<option value="davinci">davinci</option>
 					<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
 					<option value="gpt-4">gpt-4</option>
+					<option value="gpt-4-32k">gpt-4-32k</option>
 				</select>
 				<input type="checkbox" id="jailbreakprompt">
 				<div class="box-label" title="Adds extra text to improve AI response">Improve Prompt (Context Injection)</div>
@@ -7394,14 +7596,24 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 								<td><input class="settinglabel mininiput" type="text" placeholder="(Auto)" value="" id="chatopponent"  title="The name of the person you want to chat with"></td>							
 								</tr>								
 							  </table>	
-							  <div class="settinglabel">
-								<div class="justifyleft settingsmall" title="Switches to an aesthetic messenger style UI">Aesthetic Chat UI </div>	
-							   <input type="checkbox" id="enhanced_chat_ui" style="margin:0px 0 0;">
-							  </div>
-							  <div class="settinglabel">
-								<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div>	
-							   <input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
-							</div>		
+							<div class="settinglabel">
+							<div class="justifyleft settingsmall" title="Switches to an aesthetic messenger style UI">Aesthetic Chat UI </div>	
+							<input type="checkbox" id="enhanced_chat_ui" style="margin:0px 0 0;">
+							</div>
+							<div class="settinglabel">
+							<div class="justifyleft settingsmall" title="Whether to allow multiple lines in AI responses. Not recommended.">Multiline Replies </div>	
+							<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
+							</div>	
+							<div class="settinglabel">
+							<div class="justifyleft settingsmall" title="Allow the AI to send more responses if you are idle.">Idle Responses </div>	
+							<input type="checkbox" id="idle_responses" style="margin:0px 0 0;">
+							<select style="padding:1px; height:auto; width: 30px; appearance: none; font-size: 8pt;" class="form-control" id="idle_duration">
+								<option value="30">30s</option>
+								<option value="60">60s</option>
+								<option value="120">2m</option>
+								<option value="300">5m</option>
+							</select>
+							</div>
 							</div>	
 							<div id="adventuresection" class="settinglabel hidden" style="padding-top: 3px;">
 								<div class="justifyleft settingsmall">Improved Prompt <span class="helpicon">?<span
@@ -7414,7 +7626,13 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 								<input class="settinglabel mininiput" type="text" placeholder="### Instruction:" value="" id="instruct_starttag" title="The sequence to start an instruction prompt">
 								End Sequence:
 								<input class="settinglabel mininiput" type="text" placeholder="### Response:" value="" id="instruct_endtag"  title="The sequence to end an instruction prompt">
-								</div>
+								</div>								
+							   <div class="justifyleft settingsmall">Insert Newlines <span class="helpicon">?<span
+								class="helptext">Some instruct models like pygmalion may perform better without newlines.</span></span> </div>	
+								<input type="checkbox" id="instruct_has_newlines" style="margin:0px 0 0;">
+								<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
+									class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span> </div>	
+								<input type="checkbox" id="instruct_has_markdown" style="margin:0px 0 0;">
 							</div>							
 						</div>
 					</div>
@@ -7440,10 +7658,6 @@ Kobold Lite is under the AGPL v3.0 License for the purposes of koboldcpp and Kob
 							<div class="justifyleft settingsmall" title="Includes your current settings when saving or sharing your story">Export Settings </div>	
 						   <input type="checkbox" id="export_settings" style="margin:0px 0 0;">				
 						</div>	
-						<div class="settinglabel">
-							<div class="justifyleft settingsmall" title="Filters out &lt;|comment tags|&gt; from context">Filter Comment Tags </div>	
-						   <input type="checkbox" id="filter_comments" style="margin:0px 0 0;">				
-						</div>							
 					</div>
 				
 

From 37799af85ca2013e4e4ef79b72161885469df697 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Tue, 23 May 2023 23:44:52 +0200
Subject: [PATCH 066/102] Show IP for localtunnel

---
 aiserver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index b045ea71..7b94e15e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -11015,6 +11015,8 @@ def run():
     
     if(koboldai_vars.host):
         if(args.localtunnel):
+            public_ip = requests.get("https://ipv4.icanhazip.com/")
+            logger.message(f"The Public IP of this machine is : {public_ip.text}")
             import subprocess, shutil
             localtunnel = subprocess.Popen([shutil.which('lt'), '-p', str(port), 'http'], stdout=subprocess.PIPE)
             attempts = 0

From 839d56ebf2e7409705a109722bf55edd0fcee77c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 19:25:01 -0400
Subject: [PATCH 067/102] Potential fix for gpt-neo and gpt-j

---
 modeling/inference_models/hf.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index b50ebf56..2417bffb 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -199,8 +199,9 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
+        self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
-        if utils.koboldai_vars.model_type == "llama":
+        if self.model_type == "llama":
             # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
             self.tokenizer.add_bos_token = False
 
@@ -284,23 +285,23 @@ class HFInferenceModel(InferenceModel):
                 return result
             object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer))
 
-        elif utils.koboldai_vars.model_type == "opt":
+        elif self.model_type == "opt":
             self.tokenizer._koboldai_header = self.tokenizer.encode("")
             self.tokenizer.add_bos_token = False
             self.tokenizer.add_prefix_space = False
 
         # Change newline behavior to match model quirks
-        if utils.koboldai_vars.model_type == "xglm":
+        if self.model_type == "xglm":
             # Default to </s> newline mode if using XGLM
             utils.koboldai_vars.newlinemode = "s"
-        elif utils.koboldai_vars.model_type in ["opt", "bloom"]:
+        elif self.model_type in ["opt", "bloom"]:
             # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
             utils.koboldai_vars.newlinemode = "ns"
 
         # Clean up tokens that cause issues
         if (
             utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
             utils.koboldai_vars.badwordsids = [
                 [v]
@@ -357,15 +358,15 @@ class HFInferenceModel(InferenceModel):
                 revision=utils.koboldai_vars.revision,
                 cache_dir="cache",
             )
-            utils.koboldai_vars.model_type = self.model_config.model_type
+            self.model_type = self.model_config.model_type
         except ValueError:
-            utils.koboldai_vars.model_type = {
+            self.model_type = {
                 "NeoCustom": "gpt_neo",
                 "GPT2Custom": "gpt2",
-            }.get(utils.koboldai_vars.model)
+            }.get(self.model)
 
-            if not utils.koboldai_vars.model_type:
+            if not self.model_type:
                 logger.warning(
                     "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)"
                 )
-                utils.koboldai_vars.model_type = "gpt_neo"
\ No newline at end of file
+                self.model_type = "gpt_neo"
\ No newline at end of file

From 9bd445c2a8d24a20b04aa905486c367455286ff9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 23 May 2023 20:33:55 -0400
Subject: [PATCH 068/102] gpt2 fixed

---
 modeling/inference_models/generic_hf_torch/class.py |  2 +-
 modeling/inference_models/hf.py                     | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index 572337e2..bbd42096 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -59,7 +59,7 @@ class model_backend(HFTorchInferenceModel):
             "low_cpu_mem_usage": True,
         }
 
-        if utils.koboldai_vars.model_type == "gpt2":
+        if self.model_type == "gpt2":
             # We must disable low_cpu_mem_usage and if using a GPT-2 model
             # because GPT-2 is not compatible with this feature yet.
             tf_kwargs.pop("low_cpu_mem_usage", None)
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 2417bffb..7b8f356c 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -61,6 +61,7 @@ class HFInferenceModel(InferenceModel):
             else:
                 self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache")
             layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
+            layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
                     with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
@@ -143,15 +144,13 @@ class HFInferenceModel(InferenceModel):
         return requested_parameters
         
     def set_input_parameters(self, parameters):
-        if self.hf_torch:
+        if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2":
             import breakmodel
             layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
             if layer_count is not None and layer_count >= 0 and not self.nobreakmodel:
                 gpu_count = torch.cuda.device_count()
                 layers = []
-                logger.info(parameters)
                 for i in range(gpu_count):
-                    logger.info(parameters["{}_Layers".format(i)])
                     if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric():
                         layers.append(int(parameters["{}_Layers".format(i)]))
                     elif isinstance(parameters["{}_Layers".format(i)], str):
@@ -170,8 +169,13 @@ class HFInferenceModel(InferenceModel):
                 self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0
             self.model_type = self.get_model_type()
             self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel
+            self.lazy_load = True
+            logger.debug("Model type: {}".format(self.model_type))
         else:
+            logger.debug("Disabling breakmodel and lazyload")
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
+            self.breakmodel = False
+            self.lazy_load = False
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 
@@ -199,6 +203,7 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
+        utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
         self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
         if self.model_type == "llama":

From 935480a701c8cb1f672db15143af0cf6f6d006e9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 08:45:22 -0400
Subject: [PATCH 069/102] Added bad words to the transmit list for easier
 debugging

---
 koboldai_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldai_settings.py b/koboldai_settings.py
index 5467fe29..29a82406 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -647,7 +647,7 @@ class settings(object):
                     raise
 
 class model_settings(settings):
-    local_only_variables = ['badwordsids', 'apikey', 'default_preset']
+    local_only_variables = ['apikey', 'default_preset']
     no_save_variables = ['modelconfig', 'custmodpth', 'generated_tkns', 
                          'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', 
                          'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',

From 9d708bc4246e77230eeaee43d75dff5c1d4f294b Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 08:56:52 -0400
Subject: [PATCH 070/102] Logging of environmental variables over-riding
 command line arguments

---
 aiserver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aiserver.py b/aiserver.py
index 6276e514..97472f81 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1405,6 +1405,7 @@ def general_startup(override_args=None):
         args = parser.parse_args(shlex.split(override_args))
     elif(os.environ.get("KOBOLDAI_ARGS") is not None):
         import shlex
+        logger.info("Using environmental variables instead of command arguments: {}".format(os.environ["KOBOLDAI_ARGS"]))
         args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"]))
     else:
         args = parser.parse_args()

From c61e2b676a5917072d665812849e4407632c1724 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 09:05:21 -0400
Subject: [PATCH 071/102] More environmental variable feedback

---
 aiserver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 97472f81..777b36d3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1428,9 +1428,11 @@ def general_startup(override_args=None):
     for arg in temp:
         if arg == "path":
             if "model_path" in os.environ:
+                logger.info("Setting model path based on enviornmental variable: {}".format(os.environ["model_path"]))
                 setattr(args, arg, os.environ["model_path"])
         else:
             if arg in os.environ:
+                logger.info("Setting {} based on enviornmental variable: {}".format(arg, os.environ[arg]))
                 if isinstance(getattr(args, arg), bool):
                     if os.environ[arg].lower() == "true":
                         setattr(args, arg, True)

From 068173b24a3f0da9df8144db7a12052d814874cb Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 09:08:34 -0400
Subject: [PATCH 072/102] Potential BadWords fix

---
 modeling/inference_models/hf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 7b8f356c..032b8ec3 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -203,7 +203,7 @@ class HFInferenceModel(InferenceModel):
             pass
 
     def _post_load(self) -> None:
-        utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
+        self.badwordsids = koboldai_settings.badwordsids_default
         self.model_type = str(self.model_config.model_type)
         # These are model specific tokenizer overrides if a model has bad defaults
         if self.model_type == "llama":
@@ -305,17 +305,17 @@ class HFInferenceModel(InferenceModel):
 
         # Clean up tokens that cause issues
         if (
-            utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default
+            self.badwordsids == koboldai_settings.badwordsids_default
             and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
-            utils.koboldai_vars.badwordsids = [
+            self.badwordsids = [
                 [v]
                 for k, v in self.tokenizer.get_vocab().items()
                 if any(c in str(k) for c in "[]")
             ]
 
             if utils.koboldai_vars.newlinemode == "n":
-                utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id])
+                self.badwordsids.append([self.tokenizer.eos_token_id])
 
         return super()._post_load()
 

From 92f592ea203a8f758a73d982aa5fb96ee3670eed Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 11:48:25 -0400
Subject: [PATCH 073/102] Fix for model name not showing correctly on load in
 UI1

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 777b36d3..42715de6 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1643,7 +1643,7 @@ def load_model(model_backend, initial_load=False):
     koboldai_vars.noai = False
     set_aibusy(True)
     if koboldai_vars.model != 'ReadOnly':
-        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True)
+        emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(model_backends[model_backend].model_name if "model_name" in vars(model_backends[model_backend]) else model_backends[model_backend].id)}, broadcast=True)
         #Have to add a sleep so the server will send the emit for some reason
         time.sleep(0.1)
 

From 1a1b79a16d2e5d7cc7e8865350de0e8afde24357 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 12:44:26 -0400
Subject: [PATCH 074/102] Change default for HF on non-breakmodel models to use
 GPU instead of CPU

---
 modeling/inference_models/hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 032b8ec3..ee585321 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -133,7 +133,7 @@ class HFInferenceModel(InferenceModel):
                                                 "unit": "bool",
                                                 "label": "Use GPU",
                                                 "id": "use_gpu",
-                                                "default": False,
+                                                "default": True,
                                                 "tooltip": "Whether or not to use the GPU",
                                                 "menu_path": "Layers",
                                                 "extra_classes": "",

From b116e22bca85f059976711063850bdbfc5430522 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 16:47:19 -0400
Subject: [PATCH 075/102] Fix for colab

---
 aiserver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 42715de6..b06aaa83 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1559,6 +1559,9 @@ def general_startup(override_args=None):
     
     socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio)
     
+    if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
+         args.model_backend = "Huggingface MTJ"
+    
     if args.model:
         # At this point we have to try to load the model through the selected backend
         if args.model_backend not in model_backends:
@@ -1593,6 +1596,7 @@ def general_startup(override_args=None):
         return args.model_backend
     else:
         return "Read Only"
+        
     
         
     

From 5fe8c71b2ed9132ca591d3797d1deca6f8e8762e Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 18:55:31 -0400
Subject: [PATCH 076/102] TPU Fixes

---
 modeling/inference_models/hf_mtj/class.py | 3 ++-
 tpu_mtj_backend.py                        | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 4de3a1b2..876e950e 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -186,6 +186,7 @@ class model_backend(HFInferenceModel):
 
         tpu_mtj_backend.load_model(
             utils.koboldai_vars.model,
+            self.model_type,
             hf_checkpoint=utils.koboldai_vars.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
@@ -202,7 +203,7 @@ class model_backend(HFInferenceModel):
 
         if (
             utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
-            and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj")
+            and self.model_type not in ("gpt2", "gpt_neo", "gptj")
         ):
             utils.koboldai_vars.badwordsids = [
                 [v]
diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index 07261636..d5a4d1db 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -941,7 +941,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
     koboldai_vars.status_message = ""
 
-def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):
@@ -989,9 +989,9 @@ def load_model(path: str, driver_version="tpu_driver_20221109", hf_checkpoint=Fa
 
     # Try to convert HF config.json to MTJ config
     if hf_checkpoint:
-        spec_path = os.path.join("maps", koboldai_vars.model_type + ".json")
+        spec_path = os.path.join("maps", model_type + ".json")
         if not os.path.isfile(spec_path):
-            raise NotImplementedError(f"Unsupported model type {repr(koboldai_vars.model_type)}")
+            raise NotImplementedError(f"Unsupported model type {repr(model_type)}")
         with open(spec_path) as f:
             lazy_load_spec = json.load(f)
 

From 6620df535035f8717f402e06381de062cd81918f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:03:49 -0400
Subject: [PATCH 077/102] debug info

---
 modeling/inference_models/hf_mtj/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 876e950e..91ddf03d 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -184,6 +184,7 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
+        logger.info(self.model_type)
         tpu_mtj_backend.load_model(
             utils.koboldai_vars.model,
             self.model_type,

From 703da112ee8b6b14ead182a157b9cc82e6493707 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:17:51 -0400
Subject: [PATCH 078/102] TPU Fix

---
 modeling/inference_models/hf_mtj/class.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 91ddf03d..00fbfec3 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -186,9 +186,9 @@ class model_backend(HFInferenceModel):
 
         logger.info(self.model_type)
         tpu_mtj_backend.load_model(
-            utils.koboldai_vars.model,
+            self.model,
             self.model_type,
-            hf_checkpoint=utils.koboldai_vars.model
+            hf_checkpoint=self.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
             socketio_queue=koboldai_settings.queue,

From 6a627265754ef9b2cb2cfb20a476a8af1d383398 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:30:23 -0400
Subject: [PATCH 079/102] TPU Fix?

---
 aiserver.py                               | 2 +-
 modeling/inference_models/hf_mtj/class.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index b06aaa83..998441c8 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1548,7 +1548,7 @@ def general_startup(override_args=None):
             koboldai_vars.custmodpth = modpath
     elif args.model:
         logger.message(f"Welcome to KoboldAI!")
-        logger.message(f"You have selected the following Model: {koboldai_vars.model}")
+        logger.message(f"You have selected the following Model: {args.model}")
         if args.path:
             logger.message(f"You have selected the following path for your Model: {args.path}")
             koboldai_vars.custmodpth = args.path;
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 00fbfec3..bc31b3fa 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -187,7 +187,6 @@ class model_backend(HFInferenceModel):
         logger.info(self.model_type)
         tpu_mtj_backend.load_model(
             self.model,
-            self.model_type,
             hf_checkpoint=self.model
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,

From 54221942ef74c20ac209c1ce52576cc65bf961ae Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:43:32 -0400
Subject: [PATCH 080/102] TPU Fix

---
 aiserver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aiserver.py b/aiserver.py
index 998441c8..ae8fecb3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1561,6 +1561,7 @@ def general_startup(override_args=None):
     
     if koboldai_vars.use_colab_tpu and args.model_backend == "Huggingface":
          args.model_backend = "Huggingface MTJ"
+         
     
     if args.model:
         # At this point we have to try to load the model through the selected backend
@@ -1589,6 +1590,7 @@ def general_startup(override_args=None):
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
         arg_parameters['id'] = args.model
+        arg_parameters['model'] = args.model
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)

From ea4e3c477c82cc2239ec1da8bac5e4de4410e91f Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:45:21 -0400
Subject: [PATCH 081/102] More debuging

---
 modeling/inference_models/hf_mtj/class.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index bc31b3fa..13591425 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -184,7 +184,8 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
-        logger.info(self.model_type)
+        logger.info(self.model)
+        logger.info(self.id)
         tpu_mtj_backend.load_model(
             self.model,
             hf_checkpoint=self.model

From b0ed7da9dde714943632de5fd917de557fdf30b6 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:47:45 -0400
Subject: [PATCH 082/102] more tpu debugging

---
 aiserver.py                               | 1 +
 modeling/inference_models/hf_mtj/class.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index ae8fecb3..ec8d05a7 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1594,6 +1594,7 @@ def general_startup(override_args=None):
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
+        logger.info(vars(model_backends[args.model_backend]))
         koboldai_vars.model = args.model
         return args.model_backend
     else:
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 13591425..5f19897f 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -185,7 +185,6 @@ class model_backend(HFInferenceModel):
         utils.koboldai_vars.allowsp = True
 
         logger.info(self.model)
-        logger.info(self.id)
         tpu_mtj_backend.load_model(
             self.model,
             hf_checkpoint=self.model

From c9523a340e526c76f669bb269f7ff53116bf25c7 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 19:50:08 -0400
Subject: [PATCH 083/102] TPU Fix

---
 aiserver.py                               |  1 -
 modeling/inference_models/hf_mtj/class.py | 12 ++++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ec8d05a7..ae8fecb3 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1594,7 +1594,6 @@ def general_startup(override_args=None):
         arg_parameters['model_path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
-        logger.info(vars(model_backends[args.model_backend]))
         koboldai_vars.model = args.model
         return args.model_backend
     else:
diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py
index 5f19897f..e029db9d 100644
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -150,7 +150,7 @@ class model_backend(HFInferenceModel):
 
         tpu_mtj_backend.socketio = utils.socketio
 
-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
             utils.koboldai_vars.badwordsids = utils.koboldai_vars.badwordsids_neox
 
         print(
@@ -158,7 +158,7 @@ class model_backend(HFInferenceModel):
                 Colors.PURPLE, Colors.END
             )
         )
-        if utils.koboldai_vars.model in (
+        if self.model_name in (
             "TPUMeshTransformerGPTJ",
             "TPUMeshTransformerGPTNeoX",
         ) and (
@@ -168,7 +168,7 @@ class model_backend(HFInferenceModel):
             raise FileNotFoundError(
                 f"The specified model path {repr(utils.koboldai_vars.custmodpth)} is not the path to a valid folder"
             )
-        if utils.koboldai_vars.model == "TPUMeshTransformerGPTNeoX":
+        if self.model_name == "TPUMeshTransformerGPTNeoX":
             tpu_mtj_backend.pad_token_id = 2
 
         tpu_mtj_backend.koboldai_vars = utils.koboldai_vars
@@ -184,10 +184,10 @@ class model_backend(HFInferenceModel):
         self.init_model_config()
         utils.koboldai_vars.allowsp = True
 
-        logger.info(self.model)
+        logger.info(self.model_name)
         tpu_mtj_backend.load_model(
-            self.model,
-            hf_checkpoint=self.model
+            self.model_name,
+            hf_checkpoint=self.model_name
             not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
             and utils.koboldai_vars.use_colab_tpu,
             socketio_queue=koboldai_settings.queue,

From 1a7c2ddab0b582758456af292c439f177460df53 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 20:14:22 -0400
Subject: [PATCH 084/102] TPU Fix?

---
 tpu_mtj_backend.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index d5a4d1db..bf08f745 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -460,14 +460,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
     return carry
 
 class PenalizingCausalTransformer(CausalTransformer):
-    def __init__(self, config, **kwargs):
+    def __init__(self, badwordsids, config, **kwargs):
         # Initialize
         super().__init__(config, **kwargs)
         def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None):
             compiling_callback()
             numseqs = numseqs_aux.shape[0]
             # These are the tokens that we don't want the AI to ever write
-            badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+            badwords = jnp.array(badwordsids).squeeze()
             @hk.transform
             def generate_sample(context, ctx_length):
                 # Give the initial context to the transformer
@@ -941,7 +941,9 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
     koboldai_vars.status_message = ""
 
-def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+import koboldai_settings
+
+def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):
@@ -1119,12 +1121,12 @@ def load_model(path: str, model_type: str, driver_version="tpu_driver_20221109",
 
     global badwords
     # These are the tokens that we don't want the AI to ever write
-    badwords = jnp.array(koboldai_vars.badwordsids).squeeze()
+    badwords = jnp.array(badwordsids).squeeze()
 
     if not path.endswith("/"):
         path += "/"
 
-    network = PenalizingCausalTransformer(params, dematerialized=True)
+    network = PenalizingCausalTransformer(badwordsids, params, dematerialized=True)
 
     if not hf_checkpoint and koboldai_vars.model != "TPUMeshTransformerGPTNeoX":
         network.state = read_ckpt_lowmem(network.state, path, devices.shape[1])

From b5272ea607ad38e162b4625893ee491900305342 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Wed, 24 May 2023 20:15:11 -0400
Subject: [PATCH 085/102] Whoops

---
 tpu_mtj_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py
index bf08f745..df37e0be 100644
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -943,7 +943,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
 
 import koboldai_settings
 
-def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
+def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badwordsids_default, driver_version="tpu_driver_20221109", hf_checkpoint=False, socketio_queue=None, initial_load=False, logger=None, **kwargs) -> None:
     global thread_resources_env, seq, tokenizer, network, params, pad_token_id
 
     if kwargs.get("pad_token_id"):

From adb77b86513f0037c2197c185af7a91553d36e04 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 25 May 2023 18:43:56 -0400
Subject: [PATCH 086/102] Fix for horde and multi-selected models

---
 aiserver.py                              |  1 +
 modeling/inference_models/horde/class.py |  2 +-
 static/application.js                    | 21 +++++++++++++++------
 static/koboldai.js                       | 21 +++++++++++++++------
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ae8fecb3..cfae94cd 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6199,6 +6199,7 @@ def UI_2_resubmit_model_info(data):
 @socketio.on('load_model')
 @logger.catch
 def UI_2_load_model(data):
+    logger.debug("Loading model with user input of: {}".format(data))
     model_backends[data['plugin']].set_input_parameters(data)
     load_model(data['plugin'])
     #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit'])
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 2c4c4bf5..38b1c5c6 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import time
+import time, json
 import torch
 import requests
 import numpy as np
diff --git a/static/application.js b/static/application.js
index ca81f729..8bc6c830 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4378,12 +4378,21 @@ function load_model() {
 	if (settings_area) {
 		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 			var element_data = element.value;
-			if (element.getAttribute("data_type") == "int") {
-				element_data = parseInt(element_data);
-			} else if (element.getAttribute("data_type") == "float") {
-				element_data = parseFloat(element_data);
-			} else if (element.getAttribute("data_type") == "bool") {
-				element_data = (element_data == 'on');
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
 			}
 			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}
diff --git a/static/koboldai.js b/static/koboldai.js
index f0a1f6f8..fc33a020 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1970,12 +1970,21 @@ function load_model() {
 	if (settings_area) {
 		for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) {
 			var element_data = element.value;
-			if (element.getAttribute("data_type") == "int") {
-				element_data = parseInt(element_data);
-			} else if (element.getAttribute("data_type") == "float") {
-				element_data = parseFloat(element_data);
-			} else if (element.getAttribute("data_type") == "bool") {
-				element_data = (element_data == 'on');
+			if ((element.tagName == "SELECT") && (element.multiple)) {
+				element_data = [];
+				for (var i=0, iLen=element.options.length; i<iLen; i++) {
+					if (element.options[i].selected) {
+						element_data.push(element.options[i].value);
+					}
+				}
+			} else {
+				if (element.getAttribute("data_type") == "int") {
+					element_data = parseInt(element_data);
+				} else if (element.getAttribute("data_type") == "float") {
+					element_data = parseFloat(element_data);
+				} else if (element.getAttribute("data_type") == "bool") {
+					element_data = (element_data == 'on');
+				}
 			}
 			data[element.id.split("|")[1].replace("_value", "")] = element_data;
 		}

From 0659b5062ba1d73d90455212e3042dccc3645dfc Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Thu, 25 May 2023 18:46:35 -0400
Subject: [PATCH 087/102] Added proper model name for horde

---
 modeling/inference_models/horde/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 38b1c5c6..3b102b46 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -29,6 +29,7 @@ class model_backend(InferenceModel):
         self.url = "https://horde.koboldai.net"
         self.key = "0000000000"
         self.models = self.get_cluster_models()
+        self.model_name = "Horde"
         
 
         # Do not allow API to be served over the API

From 31fb9ba49f615cd3f05e60aded5444bf4a59dfe4 Mon Sep 17 00:00:00 2001
From: henk717 <henk@henk.tech>
Date: Fri, 26 May 2023 02:28:31 +0200
Subject: [PATCH 088/102] Duplicate dependency fix

---
 environments/huggingface.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index af16423e..e99f4db8 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -38,7 +38,6 @@ dependencies:
     - accelerate==0.18.0
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - flask-session
-    - python-socketio[client]
     - ansi2html
     - flask_compress
     - ijson

From d2c95bc60f6f9926699493b6a3144f427b62e5e9 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 10:33:59 -0400
Subject: [PATCH 089/102] Fix for non-jailed menu path navigation

---
 aiserver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index cfae94cd..e492cfcf 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -6147,7 +6147,7 @@ def UI_2_select_model(data):
         emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]})
     else:
         #Get load methods
-        if data['ismenu'] == 'false':
+        if 'ismenu' in data and data['ismenu'] == 'false':
             valid_loaders = {}
             if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]:
                 #Here if we have a model id that's in our menu, we explicitly use that backend

From 2c82e9c5e0fe0903f16291bcdb3816427a5af7f2 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:08:30 -0400
Subject: [PATCH 090/102] GooseAI Fixes

---
 modeling/inference_models/api/class.py       |  2 +-
 modeling/inference_models/basic_api/class.py |  2 +-
 modeling/inference_models/horde/class.py     | 10 +++++++---
 modeling/inference_models/openai_gooseai.py  | 18 ++++++++++++------
 modeling/inference_models/readonly/class.py  |  2 +-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index 3d54edd9..b3129d5a 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -32,7 +32,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
             with open("settings/api.model_backend.settings", "r") as f:
                 self.base_url = json.load(f)['base_url']
diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py
index 2094d34e..b492c039 100644
--- a/modeling/inference_models/basic_api/class.py
+++ b/modeling/inference_models/basic_api/class.py
@@ -33,7 +33,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "Colab"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'colaburl' not in vars(self):
             with open("settings/api.model_backend.settings", "r") as f:
                 self.colaburl = json.load(f)['base_url']
diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 3b102b46..2cc01708 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -39,19 +39,23 @@ class model_backend(InferenceModel):
         logger.debug("Horde Models: {}".format(self.models))
         return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models]
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/api.model_backend.settings") and 'base_url' not in vars(self):
             with open("settings/horde.model_backend.settings", "r") as f:
                 temp = json.load(f)
                 self.base_url = temp['url']
                 self.key = temp['key']
+        if 'key' in parameters:
+            self.key = parameters['key']
+        if 'url' in parameters:
+            self.url = parameters['url']
         requested_parameters = []
         requested_parameters.extend([{
                                         "uitype": "text",
                                         "unit": "text",
                                         "label": "URL",
                                         "id": "url",
-                                        "default": self.url,
+                                        "default": self.url if 'url' not in parameters else parameters['url'],
                                         "tooltip": "URL to the horde.",
                                         "menu_path": "",
                                         "check": {"value": "", 'check': "!="},
@@ -63,7 +67,7 @@ class model_backend(InferenceModel):
                                         "unit": "text",
                                         "label": "Key",
                                         "id": "key",
-                                        "default": self.key,
+                                        "default": self.key if 'key' not in parameters else parameters['key'],
                                         "check": {"value": "", 'check': "!="},
                                         "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).",
                                         "menu_path": "",
diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py
index e4b9dfb8..0195f650 100644
--- a/modeling/inference_models/openai_gooseai.py
+++ b/modeling/inference_models/openai_gooseai.py
@@ -1,5 +1,5 @@
 import torch
-import requests
+import requests,json
 import numpy as np
 from typing import List, Optional, Union
 import os
@@ -30,10 +30,15 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "OAI" or model_name == "GooseAI"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self):
             with open("settings/{}.model_backend.settings".format(self.source), "r") as f:
-                self.key = json.load(f)['key']
+                try:
+                    self.key = json.load(f)['key']
+                except:
+                    pass
+        if 'key' in parameters:
+            self.key = parameters['key']
         self.source = model_name
         requested_parameters = []
         requested_parameters.extend([{
@@ -66,7 +71,7 @@ class model_backend(InferenceModel):
         
     def set_input_parameters(self, parameters):
         self.key = parameters['key'].strip()
-        self.model = parameters['model']
+        self.model_name = parameters['model']
 
     def get_oai_models(self):
         if self.key == "":
@@ -94,6 +99,7 @@ class model_backend(InferenceModel):
 
                 
             logger.init_ok("OAI Engines", status="OK")
+            logger.debug("OAI Engines: {}".format(engines))
             return engines
         else:
             # Something went wrong, print the message and quit since we can't initialize an engine
@@ -134,7 +140,7 @@ class model_backend(InferenceModel):
         # Build request JSON data
         # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround
         # as the koboldai_vars.model will always be OAI
-        if "GooseAI" in utils.koboldai_vars.configname:
+        if self.source == "GooseAI":
             reqdata = {
                 "prompt": decoded_prompt,
                 "max_tokens": max_new,
@@ -163,7 +169,7 @@ class model_backend(InferenceModel):
             }
 
         req = requests.post(
-            self.url,
+            "{}/{}/completions".format(self.url, self.model_name),
             json=reqdata,
             headers={
                 "Authorization": "Bearer " + self.key,
diff --git a/modeling/inference_models/readonly/class.py b/modeling/inference_models/readonly/class.py
index 92531af4..98573990 100644
--- a/modeling/inference_models/readonly/class.py
+++ b/modeling/inference_models/readonly/class.py
@@ -33,7 +33,7 @@ class model_backend(InferenceModel):
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "ReadOnly"
     
-    def get_requested_parameters(self, model_name, model_path, menu_path):
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
         requested_parameters = []
         return requested_parameters
         

From 52f5d879061c7ce593fe05a417466d83425f0ad6 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:25:28 -0400
Subject: [PATCH 091/102] Fix horde tokenizer

---
 modeling/inference_models/horde/class.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py
index 2cc01708..f7da6604 100644
--- a/modeling/inference_models/horde/class.py
+++ b/modeling/inference_models/horde/class.py
@@ -30,6 +30,7 @@ class model_backend(InferenceModel):
         self.key = "0000000000"
         self.models = self.get_cluster_models()
         self.model_name = "Horde"
+        self.model = []
         
 
         # Do not allow API to be served over the API
@@ -114,7 +115,7 @@ class model_backend(InferenceModel):
 
         engines = req.json()
         try:
-            engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
+            engines = [{"text": "All", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines]
         except:
             logger.error(engines)
             raise
@@ -127,10 +128,14 @@ class model_backend(InferenceModel):
         return engines
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
+        tokenizer_name = "gpt2"
+        if len(self.model) > 0:
+            if self.model[0] == "all" and len(self.model) > 1:
+                tokenizer_name = self.model[1]
+            else:
+                tokenizer_name = self.model[0]
         self.tokenizer = self._get_tokenizer(
-            self.model
-            #if len(self.model) > 0
-            #else "gpt2",
+            tokenizer_name
         )
 
     def _save_settings(self):

From 0376ab5715a8283f05db91b4eede862bf84f216a Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 11:42:29 -0400
Subject: [PATCH 092/102] KoboldAI API model name fix

---
 modeling/inference_models/api/class.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py
index b3129d5a..64cfd2ab 100644
--- a/modeling/inference_models/api/class.py
+++ b/modeling/inference_models/api/class.py
@@ -28,6 +28,7 @@ class model_backend(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.base_url = ""
+        self.model_name = "KoboldAI API"
 
     def is_valid(self, model_name, model_path, menu_path):
         return model_name == "API"

From 51cea7eb9dfafaf45a3b58b56dd5df45d21dca99 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 12:43:17 -0400
Subject: [PATCH 093/102] Added ability to add labels that are based on
 validation data in model loading settings

---
 modeling/inference_models/hf.py | 12 ++++++++++++
 static/application.js           | 31 +++++++++++++++++++++++++++++--
 static/custom.css               |  1 +
 static/koboldai.css             |  1 +
 static/koboldai.js              | 29 ++++++++++++++++++++++++++++-
 5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index ee585321..7a21bca6 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -77,6 +77,18 @@ class HFInferenceModel(InferenceModel):
                 break_values += [0] * (gpu_count - len(break_values))
                 if disk_blocks is not None:
                     break_values += [int(disk_blocks)]
+                requested_parameters.append({
+                                                "uitype": "Valid Display",
+                                                "unit": "text",
+                                                "label": "Current Allocated Layers: %1/{}".format(layer_count), #%1 will be the validation value
+                                                "id": "valid_layers",
+                                                "max": layer_count,
+                                                "step": 1,
+                                                "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="},
+                                                "menu_path": "Layers",
+                                                "extra_classes": "",
+                                                "refresh_model_inputs": False
+                                            })
                 for i in range(gpu_count):
                     requested_parameters.append({
                                                     "uitype": "slider",
diff --git a/static/application.js b/static/application.js
index 8bc6c830..11fba578 100644
--- a/static/application.js
+++ b/static/application.js
@@ -4080,6 +4080,25 @@ function model_settings_checker() {
 		accept.classList.add("disabled");
 		accept.disabled = true;
 	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
 }
 
 function selected_model_info(sent_data) {
@@ -4250,18 +4269,26 @@ function selected_model_info(sent_data) {
 				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
 			model_area.append(new_setting);
 			loadmodelsettings.append(model_area);
 		}
 	}
 	
 	//unhide the first plugin settings
-	console.log(document.getElementById("modelplugin").value + "_settings_area");
 	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}
 	
-	model_settings_checker();
+	model_settings_checker()
 	
 }
 
diff --git a/static/custom.css b/static/custom.css
index 968d73e4..b8e3f455 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2395,6 +2395,7 @@ body.connected .popupfooter, .popupfooter.always-available {
 	padding-top: 0px;
 	text-align: right;
 	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
 }
 
 .setting_label {
diff --git a/static/koboldai.css b/static/koboldai.css
index 85aea08a..3252c21a 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -386,6 +386,7 @@ border-top-right-radius: var(--tabs_rounding);
 	padding-top: 0px;
 	text-align: right;
 	font-size: calc(0.8em + var(--font_size_adjustment));
+	text-align: left;
 }
 
 .setting_label {
diff --git a/static/koboldai.js b/static/koboldai.js
index fc33a020..99383728 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -1754,6 +1754,25 @@ function model_settings_checker() {
 		accept.classList.add("disabled");
 		accept.disabled = true;
 	}
+	
+	
+	//We now have valid display boxes potentially. We'll go through them and update the display
+	for (const item of document.querySelectorAll(".model_settings_valid_display:not(#blank_model_settings_valid_display)")) {
+		check_value = 0
+		missing_element = false;
+		for (const temp of item.check_data['sum']) {
+			if (document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value")) {
+				check_value += parseInt(document.getElementById(item.id.split("|")[0] +"|"  + temp + "_value").value);
+			} else {
+				missing_element = true;
+			}
+		}
+		if (!missing_element) {
+			item.innerText = item.original_text.replace("%1", check_value);
+		}
+		
+		
+	}
 }
 
 function selected_model_info(sent_data) {
@@ -1924,13 +1943,21 @@ function selected_model_info(sent_data) {
 				new_setting.querySelector('#blank_model_settings_text').remove();
 			}
 			
+			if (item['uitype'] == "Valid Display") {
+				new_setting = document.createElement("DIV");
+				new_setting.classList.add("model_settings_valid_display");
+				new_setting.id = loader + "|" + item['id'] + "_value";
+				new_setting.innerText = item['label'];
+				new_setting.check_data = item['check'];
+				new_setting.original_text = item['label'];
+			}
+			
 			model_area.append(new_setting);
 			loadmodelsettings.append(model_area);
 		}
 	}
 	
 	//unhide the first plugin settings
-	console.log(document.getElementById("modelplugin").value + "_settings_area");
 	if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) {
 		document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden");
 	}

From 64ef8ca7c29a7eedc19f194a2c3a3e6506c80a8c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 14:04:26 -0400
Subject: [PATCH 094/102] Fix for UI1 not highlighting selected model

---
 static/custom.css | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/static/custom.css b/static/custom.css
index b8e3f455..25aa7818 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2410,4 +2410,9 @@ body.connected .popupfooter, .popupfooter.always-available {
 .input_error {
 	border: 5px solid red !important;
 	box-sizing: border-box !important;
+}
+
+.popup .model_item.model_menu_selected {
+	color: var(--popup_selected_color);
+	background-color: var(--popup_selected_color_text);
 }
\ No newline at end of file

From acf5b40cd8907996a5365e2353dd3ca8c09ff134 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 19:38:37 -0400
Subject: [PATCH 095/102] Bug fix

---
 aiserver.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index e492cfcf..9653fb25 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1579,11 +1579,12 @@ def general_startup(override_args=None):
             arg_parameters['use_gpu'] = True
         
         for parameter in parameters:
-            if parameter['default'] == "" and parameter['id'] not in arg_parameters:
-                mising_parameters.append(parameter['id'])
-                ok_to_load = False
-            elif parameter['id'] not in arg_parameters:
-                arg_parameters[parameter['id']] = parameter['default']
+            if parameter['uitype'] != "Valid Display":
+                if parameter['default'] == "" and parameter['id'] not in arg_parameters:
+                    mising_parameters.append(parameter['id'])
+                    ok_to_load = False
+                elif parameter['id'] not in arg_parameters:
+                    arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
             logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))

From 9723154bed0c442a7d0140c077f5c5edc7e2f73e Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 20:10:11 -0400
Subject: [PATCH 096/102] Fix for --path

---
 aiserver.py                     | 7 +++----
 modeling/inference_models/hf.py | 4 +++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 9653fb25..4e02ef96 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1545,14 +1545,13 @@ def general_startup(override_args=None):
         if(modpath):
             # Save directory to koboldai_vars
             koboldai_vars.model = "NeoCustom"
-            koboldai_vars.custmodpth = modpath
+            args.path = modpath
     elif args.model:
         logger.message(f"Welcome to KoboldAI!")
         logger.message(f"You have selected the following Model: {args.model}")
         if args.path:
             logger.message(f"You have selected the following path for your Model: {args.path}")
-            koboldai_vars.custmodpth = args.path;
-            koboldai_vars.colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
+            model_backends["KoboldAI Old Colab Method"].colaburl = args.path + "/request"; # Lets just use the same parameter to keep it simple
             
     #setup socketio relay queue
     koboldai_settings.queue = multiprocessing.Queue()
@@ -1592,7 +1591,7 @@ def general_startup(override_args=None):
             exit()
         arg_parameters['id'] = args.model
         arg_parameters['model'] = args.model
-        arg_parameters['model_path'] = args.path
+        arg_parameters['path'] = args.path
         arg_parameters['menu_path'] = ""
         model_backends[args.model_backend].set_input_parameters(arg_parameters)
         koboldai_vars.model = args.model
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 7a21bca6..c7bfdee4 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -338,9 +338,11 @@ class HFInferenceModel(InferenceModel):
         Returns a string of the model's path locally, or None if it is not downloaded.
         If ignore_existance is true, it will always return a path.
         """
+        if os.path.exists(self.path):
+                return self.path
 
         if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:
-            model_path = utils.koboldai_vars.custmodpth
+            model_path = self.path
             assert model_path
 
             # Path can be absolute or relative to models directory

From 9bc9021843adf78f5b670a6974a4643f18efa099 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 21:16:54 -0400
Subject: [PATCH 097/102] Added better help message for model_parameters in
 command line arguments

---
 aiserver.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 4e02ef96..406eb01d 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1366,7 +1366,7 @@ def general_startup(override_args=None):
     parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)")
     parser.add_argument("--model", help="Specify the Model Type to skip the Menu")
     parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use")
-    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)")
+    parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (set to help to get required parameters)")
     parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)")
     parser.add_argument("--apikey", help="Specify the API key to use for online services")
     parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register")
@@ -1571,12 +1571,13 @@ def general_startup(override_args=None):
         parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "")
         ok_to_load = True
         mising_parameters = []
-        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {}
+        arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" and args.model_parameters.lower() != "help" else {}
         
         #If we're on colab we'll set everything to GPU0
         if args.colab and args.model_backend == 'Huggingface' and koboldai_vars.on_colab:
             arg_parameters['use_gpu'] = True
         
+        
         for parameter in parameters:
             if parameter['uitype'] != "Valid Display":
                 if parameter['default'] == "" and parameter['id'] not in arg_parameters:
@@ -1586,9 +1587,13 @@ def general_startup(override_args=None):
                     arg_parameters[parameter['id']] = parameter['default']
         if not ok_to_load:
             logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
-            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters])))
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
             logger.error("Missing: {}".format(", ".join(mising_parameters)))
             exit()
+        if args.model_parameters.lower() == "help":
+            logger.error("Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)")
+            logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters if x['uitype'] != "Valid Display"])))
+            exit()
         arg_parameters['id'] = args.model
         arg_parameters['model'] = args.model
         arg_parameters['path'] = args.path

From cce5c1932cd94d3c710db62d1ced8feac2b5d774 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Fri, 26 May 2023 21:40:39 -0400
Subject: [PATCH 098/102] Fix for custom model names

---
 aiserver.py                                         | 5 ++++-
 modeling/inference_models/generic_hf_torch/class.py | 2 +-
 modeling/inference_models/hf.py                     | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 406eb01d..d9ed0088 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1034,7 +1034,7 @@ def getmodelname():
     if(koboldai_vars.online_model != ''):
         return(f"{koboldai_vars.model}/{koboldai_vars.online_model}")
     if(koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
-        modelname = os.path.basename(os.path.normpath(koboldai_vars.custmodpth))
+        modelname = os.path.basename(os.path.normpath(model.path))
         return modelname
     else:
         modelname = koboldai_vars.model if koboldai_vars.model is not None else "Read Only"
@@ -1687,6 +1687,9 @@ def load_model(model_backend, initial_load=False):
     model = model_backends[model_backend]
     model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
+    if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
+        koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
+        logger.info(koboldai_vars.model)
     logger.debug("Model Type: {}".format(koboldai_vars.model_type))
     
     # TODO: Convert everywhere to use model.tokenizer
diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
index bbd42096..fd4c2a1a 100644
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -41,7 +41,7 @@ class model_backend(HFTorchInferenceModel):
 
         if self.model_name == "NeoCustom":
             self.model_name = os.path.basename(
-                os.path.normpath(utils.koboldai_vars.custmodpth)
+                os.path.normpath(self.path)
             )
         utils.koboldai_vars.model = self.model_name
 
diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index c7bfdee4..5987a1ce 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -188,6 +188,7 @@ class HFInferenceModel(InferenceModel):
             self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None
             self.breakmodel = False
             self.lazy_load = False
+        logger.info(parameters)
         self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
         self.path = parameters['path'] if 'path' in parameters else None
 

From 9e7b813c8a1b6e1dd19979d5d3a00b8d19a97ee9 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sat, 27 May 2023 14:19:47 +0200
Subject: [PATCH 099/102] Newest Horde Bridge

---
 KoboldAI-Horde-Bridge | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/KoboldAI-Horde-Bridge b/KoboldAI-Horde-Bridge
index 7a732780..20e8701d 160000
--- a/KoboldAI-Horde-Bridge
+++ b/KoboldAI-Horde-Bridge
@@ -1 +1 @@
-Subproject commit 7a7327804ff10182adf8cda48e97784958699a49
+Subproject commit 20e8701dd27d478ff405f4ac6e2042edf06174df

From 47276c3424df73bd13fe7bcbe1c686b94319507c Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 27 May 2023 08:49:21 -0400
Subject: [PATCH 100/102] Bug Fix

---
 modeling/inference_models/hf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
index 5987a1ce..4226d1b1 100644
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -339,7 +339,8 @@ class HFInferenceModel(InferenceModel):
         Returns a string of the model's path locally, or None if it is not downloaded.
         If ignore_existance is true, it will always return a path.
         """
-        if os.path.exists(self.path):
+        if self.path is not None:
+            if os.path.exists(self.path):
                 return self.path
 
         if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]:

From 6c8099381f12b4a25f45ce4a71dc2b31c02f7d67 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 27 May 2023 09:37:15 -0400
Subject: [PATCH 101/102] Changed nobreakmodel to go to all model backends

---
 aiserver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index d9ed0088..0d4c1e6b 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1482,7 +1482,8 @@ def general_startup(override_args=None):
         koboldai_vars.quiet = True
 
     if args.nobreakmodel:
-        model_backends['Huggingface'].nobreakmodel = True
+        for model_backend in model_backends:
+            model_backends[model_backend].nobreakmodel = True
 
     if args.remote:
         koboldai_vars.host = True;

From f1d0be3a878a704e67b693c239b650401ecbc305 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Sat, 27 May 2023 13:06:08 -0400
Subject: [PATCH 102/102] Fix for missing CSS in UI1. Added Malware blocker for
 model backends

---
 aiserver.py       | 77 +++++++++++++++++++++++++++++++++++++++++++++--
 static/custom.css |  5 +++
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ea52cf64..5c93f87f 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -13,7 +13,7 @@ import shutil
 import eventlet
 
 eventlet.monkey_patch(all=True, thread=False, os=False)
-import os, inspect
+import os, inspect, contextlib, pickle
 os.system("")
 __file__ = os.path.dirname(os.path.realpath(__file__))
 os.chdir(__file__)
@@ -1637,7 +1637,76 @@ def unload_model():
         
     #Reload our badwords
     koboldai_vars.badwordsids = koboldai_settings.badwordsids_default
+
+class RestrictedUnpickler(pickle.Unpickler):
+    def original_persistent_load(self, saved_id):
+        return super().persistent_load(saved_id)
+
+    def forced_persistent_load(self, saved_id):
+        if saved_id[0] != "storage":
+            raise pickle.UnpicklingError("`saved_id[0]` must be 'storage'")
+        return self.original_persistent_load(saved_id)
+
+    def find_class(self, module, name):
+        if module == "collections" and name == "OrderedDict":
+            return collections.OrderedDict
+        elif module == "torch._utils" and name == "_rebuild_tensor_v2":
+            return torch._utils._rebuild_tensor_v2
+        elif module == "torch._tensor" and name == "_rebuild_from_type_v2":
+            return torch._tensor._rebuild_from_type_v2
+        elif module == "torch" and name in (
+            "DoubleStorage",
+            "FloatStorage",
+            "HalfStorage",
+            "LongStorage",
+            "IntStorage",
+            "ShortStorage",
+            "CharStorage",
+            "ByteStorage",
+            "BoolStorage",
+            "BFloat16Storage",
+            "Tensor",
+        ):
+            return getattr(torch, name)
+        elif module == "numpy.core.multiarray" and name == "scalar":
+            return np.core.multiarray.scalar
+        elif module == "numpy" and name == "dtype":
+            return np.dtype
+        elif module == "_codecs" and name == "encode":
+            return _codecs.encode
+        else:
+            # Forbid everything else.
+            qualified_name = name if module == "__builtin__" else f"{module}.{name}"
+            raise pickle.UnpicklingError(
+                f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}"
+            )
+
+    def load(self, *args, **kwargs):
+        logger.info("Using safe unpickle")
+        self.original_persistent_load = getattr(
+            self, "persistent_load", pickle.Unpickler.persistent_load
+        )
+        self.persistent_load = self.forced_persistent_load
+        return super().load(*args, **kwargs)
     
+@contextlib.contextmanager
+def use_custom_unpickler(unpickler: Type[pickle.Unpickler] = RestrictedUnpickler):
+    try:
+        old_unpickler = pickle.Unpickler
+        pickle.Unpickler = unpickler
+
+        old_pickle_load = pickle.load
+
+        def new_pickle_load(*args, **kwargs):
+            return pickle.Unpickler(*args, **kwargs).load()
+
+        pickle.load = new_pickle_load
+
+        yield
+
+    finally:
+        pickle.Unpickler = old_unpickler
+        pickle.load = old_pickle_load
     
 def load_model(model_backend, initial_load=False):
     global model
@@ -1685,8 +1754,10 @@ def load_model(model_backend, initial_load=False):
         koboldai_vars.default_preset = koboldai_settings.default_preset
 
                     
-    model = model_backends[model_backend]
-    model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+    
+    with use_custom_unpickler(RestrictedUnpickler):
+        model = model_backends[model_backend]
+        model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
     koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
     if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
         koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
diff --git a/static/custom.css b/static/custom.css
index 25aa7818..668ef787 100644
--- a/static/custom.css
+++ b/static/custom.css
@@ -2415,4 +2415,9 @@ body.connected .popupfooter, .popupfooter.always-available {
 .popup .model_item.model_menu_selected {
 	color: var(--popup_selected_color);
 	background-color: var(--popup_selected_color_text);
+}
+
+.disabled {
+	opacity: 0.4;
+	pointer-events: none;
 }
\ No newline at end of file