From 34a98d2962678b468b833ec3c506c690e4c50e8e Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:48:02 -0500
Subject: [PATCH 01/11] Context Menu: Small visual fixes

woohooooo back to css
- fixes margins to look better
- moves contents of context menu items 1px down
- fixes context menus near edge wrapping their inner text (ew)
---
 static/koboldai.css | 10 ++++++++--
 static/koboldai.js  |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3252c21a..3ad643d2 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2705,13 +2705,14 @@ body {
 #context-menu > hr {
 	/* Division Color*/
 	border-top: 2px solid var(--context_menu_division);
-	margin: 5px 5px;
+	margin: 3px 5px;
 }
 
 .context-menu-item {
 	padding: 5px;
 	padding-right: 25px;
 	min-width: 100px;
+	white-space: nowrap;
 }
 
 .context-menu-item:hover {
@@ -2722,11 +2723,16 @@ body {
 
 .context-menu-item > .material-icons-outlined {
 	position: relative;
-	top: 2px;
+	top: 3px;
 	font-size: 15px;
 	margin-right: 5px;
 }
 
+.context-menu-item > .context-menu-label {
+	position: relative;
+	top: 1px;
+}
+
 /* Substitutions */
 #Substitutions {
 	margin-left: 10px;
diff --git a/static/koboldai.js b/static/koboldai.js
index 8b70dd6a..e8053f23 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -6071,7 +6071,7 @@ process_cookies();
 			context_menu_cache.push({shouldShow: action.shouldShow});
 
 			let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon});
-			item.append(action.label);
+			$e("span", item, {classes: ["context-menu-label"], innerText: action.label});
 
 			item.addEventListener("mousedown", e => e.preventDefault());
 			// Expose the "summonEvent" to enable access to original context menu target.

From 4921040fb462dac00ba8a028a8b22e9524f9f740 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:52:12 -0500
Subject: [PATCH 02/11] Context Menu: Make things a little less bloaty

5px was a bit excessive
TODO: studied the context menu in my browser for a bit and noticed that
if it was going to be too close to the bottom, the browser changes the
vertical direction the context menu goes. sounds neat!
---
 static/koboldai.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/static/koboldai.css b/static/koboldai.css
index 3ad643d2..b83384c4 100644
--- a/static/koboldai.css
+++ b/static/koboldai.css
@@ -2709,7 +2709,7 @@ body {
 }
 
 .context-menu-item {
-	padding: 5px;
+	padding: 4px;
 	padding-right: 25px;
 	min-width: 100px;
 	white-space: nowrap;

From 46c377b0c362d715e1c37f423ab763f367c32299 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 00:53:48 -0500
Subject: [PATCH 03/11] Context Menu: Add stubs for new temporary
 stoppingcriteria idea

I think this would be cool!

Ideas:
    - disable/grey when model doesnt support stopping criteria
    - shortcuts (maybe, this would def be a power user thing)
    - option to generate until EOS token
    - option to generate forever until user manually stops
    - (not super related but pixels away) make retry while generation is
        ongoing cancel generation and retry. same with undo.
---
 static/koboldai.js       | 6 ++++++
 templates/index_new.html | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index e8053f23..64da7146 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -147,6 +147,12 @@ const context_menu_actions = {
 	"wi-img-upload-button": [
 		{label: "Upload Image", icon: "file_upload", enabledOn: "ALWAYS", click: wiImageReplace},
 		{label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage},
+	],
+	"submit-button": [
+		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
 	]
 };
 
diff --git a/templates/index_new.html b/templates/index_new.html
index 99b8c941..53bcffd5 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -110,7 +110,7 @@
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='play_pause_tts()' aria-label="play"><span id="play_tts" class="material-icons-outlined" style="font-size: 1.4em;">play_arrow</span></button>
 					<button type="button" class="btn action_button" style="width: 30px; padding: 0px;" onclick='stop_tts()' aria-label="play"><span id="stop_tts" class="material-icons-outlined" style="font-size: 1.4em;">stop</span></button>
 			</span>
-			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();">Submit</button>
+			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>
 			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
 			<button type="button" class="btn action_button redo var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRedo();" aria-label="redo"><span class="material-icons-outlined" style="font-size: 1.4em;">arrow_forward</span></button>

From 6cf63f781a3c17ab6b41c5f12cd05f824be9ba04 Mon Sep 17 00:00:00 2001
From: onesome <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 01:58:57 -0500
Subject: [PATCH 04/11] YEAAAAAAAAAA

---
 static/koboldai.js       | 11 +++++++++++
 templates/index_new.html |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 64da7146..75563df2 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -151,8 +151,19 @@ const context_menu_actions = {
 	"submit-button": [
 		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
 		null,
+		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		null,
 		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
 		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+	],
+	"undo-button": [
+		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}},
+		null,
+		{label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}},
 	]
 };
 
diff --git a/templates/index_new.html b/templates/index_new.html
index 53bcffd5..2b1c0ddf 100644
--- a/templates/index_new.html
+++ b/templates/index_new.html
@@ -112,7 +112,7 @@
 			</span>
 			<button type="button" class="btn action_button submit var_sync_alt_system_aibusy" system_aibusy=False id="btnsubmit" onclick="storySubmit();" context-menu="submit-button">Submit</button>
 			<button type="button" class="btn action_button submited var_sync_alt_system_aibusy"  system_aibusy=False id="btnsent"><img id="thinking" src="static/thinking.gif" class="force_center" onclick="socket.emit('abort','');"></button>
-			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
+			<button type="button" class="btn action_button back var_sync_alt_system_aibusy" system_aibusy=False onclick="storyBack();" aria-label="undo" context-menu="undo-button"><span class="material-icons-outlined" style="font-size: 1.4em;">replay</span></button>
 			<button type="button" class="btn action_button redo var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRedo();" aria-label="redo"><span class="material-icons-outlined" style="font-size: 1.4em;">arrow_forward</span></button>
 			<button type="button" class="btn action_button retry var_sync_alt_system_aibusy" system_aibusy=False onclick="storyRetry();" aria-label="retry"><span class="material-icons-outlined" style="font-size: 1.4em;">autorenew</span></button>
 		</div>

From 3a43b254b86733a637a2286bf8a3c9421674771a Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:27:30 -0500
Subject: [PATCH 05/11] Add basic support for some of the quick stoppers

---
 aiserver.py                 | 64 +++++++++++++++++++++++++++----------
 modeling/inference_model.py | 32 +++++++++++++++++++
 modeling/stoppers.py        | 52 ++++++++++++++++++++++++------
 static/koboldai.js          | 23 ++++++++-----
 4 files changed, 137 insertions(+), 34 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 0aa9bd4c..1cb9146e 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -12,6 +12,8 @@ import random
 import shutil
 import eventlet
 
+from modeling.inference_model import GenerationMode
+
 eventlet.monkey_patch(all=True, thread=False, os=False)
 import os, inspect, contextlib, pickle
 os.system("")
@@ -3266,7 +3268,16 @@ def check_for_backend_compilation():
             break
     koboldai_vars.checking = False
 
-def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False):
+def actionsubmit(
+    data,
+    actionmode=0,
+    force_submit=False,
+    force_prompt_gen=False,
+    disable_recentrng=False,
+    no_generate=False,
+    ignore_aibusy=False,
+    gen_mode=GenerationMode.STANDARD
+):
     # Ignore new submissions if the AI is currently busy
     if(koboldai_vars.aibusy):
         return
@@ -3424,7 +3435,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False,
 
             if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating):
                 # Off to the tokenizer!
-                calcsubmit("")
+                calcsubmit("", gen_mode=gen_mode)
                 if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
                     data = ""
                     force_submit = True
@@ -3779,7 +3790,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
 #==================================================================#
 # Take submitted text and build the text to be given to generator
 #==================================================================#
-def calcsubmit(txt):
+def calcsubmit(txt, gen_mode=GenerationMode.STANDARD):
     anotetxt     = ""    # Placeholder for Author's Note text
     forceanote   = False # In case we don't have enough actions to hit A.N. depth
     anoteadded   = False # In case our budget runs out before we hit A.N. depth
@@ -3821,7 +3832,7 @@ def calcsubmit(txt):
         logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time))
         
         start_time = time.time()
-        generate(subtxt, min, max, found_entries)
+        generate(subtxt, min, max, found_entries, gen_mode=gen_mode)
         logger.debug("Submit: generate time {}s".format(time.time()-start_time))
         attention_bias.attention_bias = None
 
@@ -3889,7 +3900,7 @@ class HordeException(Exception):
 # Send text to generator and deal with output
 #==================================================================#
 
-def generate(txt, minimum, maximum, found_entries=None):    
+def generate(txt, minimum, maximum, found_entries=None, gen_mode=GenerationMode.STANDARD):
     koboldai_vars.generated_tkns = 0
 
     if(found_entries is None):
@@ -3911,7 +3922,7 @@ def generate(txt, minimum, maximum, found_entries=None):
     # Submit input text to generator
     try:
         start_time = time.time()
-        genout, already_generated = tpool.execute(model.core_generate, txt, found_entries)
+        genout, already_generated = tpool.execute(model.core_generate, txt, found_entries, gen_mode=gen_mode)
         logger.debug("Generate: core_generate time {}s".format(time.time()-start_time))
     except Exception as e:
         if(issubclass(type(e), lupa.LuaError)):
@@ -6168,22 +6179,43 @@ def UI_2_delete_option(data):
 @socketio.on('submit')
 @logger.catch
 def UI_2_submit(data):
-    if not koboldai_vars.noai and data['theme'] != "":
+    if not koboldai_vars.noai and data['theme']:
+        # Random prompt generation
         logger.debug("doing random prompt")
         memory = koboldai_vars.memory
         koboldai_vars.memory = "{}\n\nYou generate the following {} story concept :".format(koboldai_vars.memory, data['theme'])
         koboldai_vars.lua_koboldbridge.feedback = None
         actionsubmit("", force_submit=True, force_prompt_gen=True)
         koboldai_vars.memory = memory
-    else:
-        logger.debug("doing normal input")
-        koboldai_vars.actions.clear_unused_options()
-        koboldai_vars.lua_koboldbridge.feedback = None
-        koboldai_vars.recentrng = koboldai_vars.recentrngm = None
-        if koboldai_vars.actions.action_count == -1:
-            actionsubmit(data['data'], actionmode=koboldai_vars.actionmode)
-        else:
-            actionsubmit(data['data'], actionmode=koboldai_vars.actionmode)
+        return
+
+    logger.debug("doing normal input")
+    koboldai_vars.actions.clear_unused_options()
+    koboldai_vars.lua_koboldbridge.feedback = None
+    koboldai_vars.recentrng = koboldai_vars.recentrngm = None
+
+    gen_mode_name = data.get("gen_mode", None)
+    gen_mode = {
+        # If we don't have a gen mode, or it's None (the default), just do a
+        # normal submission.
+        None: GenerationMode.STANDARD,
+
+        # NOTE: forever should be a no-op on models that don't support
+        # interrupting generation. This should be conveyed to the user by
+        # graying out the option in the context menu.
+        "forever": GenerationMode.FOREVER,
+
+        # The following gen modes require stopping criteria to be respected by
+        # the backend:
+        "until_eos": GenerationMode.UNTIL_EOS,
+        "until_newline": GenerationMode.UNTIL_NEWLINE,
+        "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END,
+    }.get(gen_mode_name, None)
+
+    if not gen_mode:
+        raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'")
+
+    actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
  
  #==================================================================#
 # Event triggered when user clicks the submit button
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index a2d4fa63..1d285576 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 from dataclasses import dataclass
 import time
 from typing import List, Optional, Union
+
+from enum import Enum
 from logger import logger
 
 import torch
@@ -12,6 +14,7 @@ from transformers import (
     GPT2Tokenizer,
     AutoTokenizer,
 )
+from modeling.stoppers import Stoppers
 from modeling.tokenizer import GenericTokenizer
 from modeling import logits_processors
 
@@ -154,6 +157,12 @@ class ModelCapabilities:
     # Some models need to warm up the TPU before use
     uses_tpu: bool = False
 
+class GenerationMode(Enum):
+    STANDARD = 0
+    FOREVER = 1
+    UNTIL_EOS = 2
+    UNTIL_NEWLINE = 3
+    UNTIL_SENTENCE_END = 4
 
 class InferenceModel:
     """Root class for all models."""
@@ -256,6 +265,7 @@ class InferenceModel:
         self,
         text: list,
         found_entries: set,
+        gen_mode: GenerationMode = GenerationMode.STANDARD,
     ):
         """Generate story text. Heavily tied to story-specific parameters; if
         you are making a new generation-based feature, consider `generate_raw()`.
@@ -263,6 +273,7 @@ class InferenceModel:
         Args:
             text (list): Encoded input tokens
             found_entries (set): Entries found for Dynamic WI
+            gen_mode (GenerationMode): The GenerationMode to pass to raw_generate. Defaults to GenerationMode.STANDARD
 
         Raises:
             RuntimeError: if inconsistancies are detected with the internal state and Lua state -- sanity check
@@ -358,6 +369,7 @@ class InferenceModel:
                         seed=utils.koboldai_vars.seed
                         if utils.koboldai_vars.full_determinism
                         else None,
+                        gen_mode=gen_mode
                     )
                     logger.debug(
                         "core_generate: run raw_generate pass {} {}s".format(
@@ -532,6 +544,7 @@ class InferenceModel:
         found_entries: set = (),
         tpu_dynamic_inference: bool = False,
         seed: Optional[int] = None,
+        gen_mode: GenerationMode = GenerationMode.STANDARD,
         **kwargs,
     ) -> GenerationResult:
         """A wrapper around `_raw_generate()` that handles gen_state and other stuff. Use this to generate text outside of the story.
@@ -547,6 +560,7 @@ class InferenceModel:
             is_core (bool, optional): Whether this generation is a core story generation. Defaults to False.
             single_line (bool, optional): Generate one line only.. Defaults to False.
             found_entries (set, optional): Entries found for Dynamic WI. Defaults to ().
+            gen_mode (GenerationMode): Special generation mode. Defaults to GenerationMode.STANDARD.
 
         Raises:
             ValueError: If prompt type is weird
@@ -568,6 +582,21 @@ class InferenceModel:
             "wi_scanner_excluded_keys", set()
         )
 
+        temp_stoppers = []
+
+        if gen_mode == GenerationMode.FOREVER:
+            raise NotImplementedError()
+        elif gen_mode == GenerationMode.UNTIL_EOS:
+            # Still need to unban
+            raise NotImplementedError()
+        elif gen_mode == GenerationMode.UNTIL_NEWLINE:
+            # TODO: Look into replacing `single_line` with `generation_mode`
+            temp_stoppers.append(Stoppers.newline_stopper)
+        elif gen_mode == GenerationMode.UNTIL_SENTENCE_END:
+            temp_stoppers.append(Stoppers.sentence_end_stopper)
+
+        self.stopper_hooks += temp_stoppers
+
         utils.koboldai_vars.inference_config.do_core = is_core
         gen_settings = GenerationSettings(*(generation_settings or {}))
 
@@ -604,6 +633,9 @@ class InferenceModel:
                 f"Generated {len(result.encoded[0])} tokens in {time_end} seconds, for an average rate of {tokens_per_second} tokens per second."
             )
 
+        for stopper in temp_stoppers:
+            self.stopper_hooks.remove(stopper)
+
         return result
 
     def generate(
diff --git a/modeling/stoppers.py b/modeling/stoppers.py
index 94c09e85..02c1ce48 100644
--- a/modeling/stoppers.py
+++ b/modeling/stoppers.py
@@ -3,15 +3,12 @@ from __future__ import annotations
 import torch
 
 import utils
-from modeling.inference_model import (
-    InferenceModel,
-)
-
+from modeling import inference_model
 
 class Stoppers:
     @staticmethod
     def core_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.inference_config.do_core:
@@ -62,7 +59,7 @@ class Stoppers:
 
     @staticmethod
     def dynamic_wi_scanner(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.inference_config.do_dynamic_wi:
@@ -93,7 +90,7 @@ class Stoppers:
 
     @staticmethod
     def chat_mode_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
         if not utils.koboldai_vars.chatmode:
@@ -118,7 +115,7 @@ class Stoppers:
 
     @staticmethod
     def stop_sequence_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
                 
@@ -145,14 +142,22 @@ class Stoppers:
 
     @staticmethod
     def singleline_stopper(
-        model: InferenceModel,
+        model: inference_model.InferenceModel,
         input_ids: torch.LongTensor,
     ) -> bool:
-        """If singleline mode is enabled, it's pointless to generate output beyond the first newline."""
+        """Stop on occurances of newlines **if singleline is enabled**."""
 
+        # It might be better just to do this further up the line
         if not utils.koboldai_vars.singleline:
             return False
+        return Stoppers.newline_stopper(model, input_ids)
 
+    @staticmethod
+    def newline_stopper(
+        model: inference_model.InferenceModel,
+        input_ids: torch.LongTensor,
+    ) -> bool:
+        """Stop on occurances of newlines."""
         # Keep track of presence of newlines in each sequence; we cannot stop a
         # batch member individually, so we must wait for all of them to contain
         # a newline.
@@ -167,3 +172,30 @@ class Stoppers:
             del model.gen_state["newline_in_sequence"]
             return True
         return False
+
+    @staticmethod
+    def sentence_end_stopper(
+        model: inference_model.InferenceModel,
+        input_ids: torch.LongTensor,
+    ) -> bool:
+        """Stops at the end of sentences."""
+
+        # TODO: Make this more robust
+        SENTENCE_ENDS = [".", "?", "!"]
+
+        # We need to keep track of stopping for each batch, since we can't stop
+        # one individually.
+        if "sentence_end_in_sequence" not in model.gen_state:
+            model.gen_state["sentence_end_sequence"] = [False] * len(input_ids)
+
+        for sequence_idx, batch_sequence in enumerate(input_ids):
+            decoded = model.tokenizer.decode(batch_sequence[-1])
+            for end in SENTENCE_ENDS:
+                if end in decoded:
+                    model.gen_state["sentence_end_sequence"][sequence_idx] = True
+                    break
+
+        if all(model.gen_state["sentence_end_sequence"]):
+            del model.gen_state["sentence_end_sequence"]
+            return True
+        return False
\ No newline at end of file
diff --git a/static/koboldai.js b/static/koboldai.js
index 75563df2..320ec927 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -149,13 +149,13 @@ const context_menu_actions = {
 		{label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage},
 	],
 	"submit-button": [
-		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()},
 		null,
-		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")},
+		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")},
 		null,
-		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}},
+		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
+		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
 	],
 	"undo-button": [
 		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
@@ -256,10 +256,17 @@ function disconnect() {
 	document.getElementById("disconnect_message").classList.remove("hidden");
 }
 
-function storySubmit() {
+function storySubmit(genMode=null) {
+	const textInput = document.getElementById("input_text");
+	const themeInput = document.getElementById("themetext");
 	disruptStoryState();
-	socket.emit('submit', {'data': document.getElementById('input_text').value, 'theme': document.getElementById('themetext').value});
-	document.getElementById('input_text').value = '';
+	socket.emit('submit', {
+		data: textInput.value,
+		theme: themeInput.value,
+		gen_mode: genMode,
+	});
+
+	textInput.value = '';
 	document.getElementById('themetext').value = '';
 }
 

From 1c4157a41b753b8e3dd4246770c3cfa889485306 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:33:38 -0500
Subject: [PATCH 06/11] Maybe another time

too many ideas at once
---
 static/koboldai.js | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 320ec927..8ccac9dc 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -157,14 +157,6 @@ const context_menu_actions = {
 		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
 		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
 	],
-	"undo-button": [
-		{label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}},
-		null,
-		{label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}},
-		{label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}},
-		null,
-		{label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}},
-	]
 };
 
 let context_menu_cache = [];

From b8671cce09e83c4c64239951edf0150ef8b8d190 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 13:48:23 -0500
Subject: [PATCH 07/11] Context Menu: Change positioning algorithm for y-axis

---
 static/koboldai.js | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/static/koboldai.js b/static/koboldai.js
index 8ccac9dc..cdb7bc79 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -5818,8 +5818,21 @@ function position_context_menu(contextMenu, x, y) {
 		right: x + width,
 	};
 
+	// Slide over if running against the window bounds.
 	if (farMenuBounds.right > bounds.right) x -= farMenuBounds.right - bounds.right;
-	if (farMenuBounds.bottom > bounds.bottom) y -= farMenuBounds.bottom - bounds.bottom;
+
+	if (farMenuBounds.bottom > bounds.bottom) {
+		// We've hit the bottom.
+
+		// The old algorithm pushed the menu against the wall, similar to what's
+		// done on the x-axis:
+		// y -= farMenuBounds.bottom - bounds.bottom;
+		// But now, we make the box change its emission direction from the cursor:
+		y -= (height + 5);
+		// The main advantage of this approach is that the cursor is never directly
+		// placed above a context menu item immediately after activating the context
+		// menu. (Thus the 5px offset also added)
+	}
 
 	contextMenu.style.left = `${x}px`;
 	contextMenu.style.top = `${y}px`;

From 8d5ae38b4568e1dbc893c8adfb7d3ac4a8bd57c2 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 14:29:41 -0500
Subject: [PATCH 08/11] Context Menu: Show if gen mode is supported

- adds callback support to `enabledOn` in context menu items
- adds `supported_gen_modes` variable for frontend to check if a gen
  mode is supported
- adds `get_supported_gen_modes` to `InferenceModel` to get supported
  gen modes
- takes advantage of cool enum features for less enum-handling code
---
 aiserver.py                 | 29 ++++++-------------
 koboldai_settings.py        |  1 +
 modeling/inference_model.py | 37 +++++++++++++++++++++----
 static/koboldai.js          | 55 +++++++++++++++++++++++++++++--------
 4 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 1cb9146e..ba224b3c 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -1790,7 +1790,9 @@ def load_model(model_backend, initial_load=False):
     
     with use_custom_unpickler(RestrictedUnpickler):
         model = model_backends[model_backend]
+        koboldai_vars.supported_gen_modes = [x.value for x in model.get_supported_gen_modes()]
         model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel)
+
     koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup
     if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"):
         koboldai_vars.model = os.path.basename(os.path.normpath(model.path))
@@ -6194,26 +6196,13 @@ def UI_2_submit(data):
     koboldai_vars.lua_koboldbridge.feedback = None
     koboldai_vars.recentrng = koboldai_vars.recentrngm = None
 
-    gen_mode_name = data.get("gen_mode", None)
-    gen_mode = {
-        # If we don't have a gen mode, or it's None (the default), just do a
-        # normal submission.
-        None: GenerationMode.STANDARD,
-
-        # NOTE: forever should be a no-op on models that don't support
-        # interrupting generation. This should be conveyed to the user by
-        # graying out the option in the context menu.
-        "forever": GenerationMode.FOREVER,
-
-        # The following gen modes require stopping criteria to be respected by
-        # the backend:
-        "until_eos": GenerationMode.UNTIL_EOS,
-        "until_newline": GenerationMode.UNTIL_NEWLINE,
-        "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END,
-    }.get(gen_mode_name, None)
-
-    if not gen_mode:
-        raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'")
+    gen_mode_name = data.get("gen_mode", None) or "standard"
+    try:
+        gen_mode = GenerationMode(gen_mode_name)
+    except ValueError:
+        # Invalid enum lookup!
+        gen_mode = GenerationMode.STANDARD
+        logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!")
 
     actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
  
diff --git a/koboldai_settings.py b/koboldai_settings.py
index ebd8c019..f061beb1 100644
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -685,6 +685,7 @@ class model_settings(settings):
         self._koboldai_vars = koboldai_vars
         self.alt_multi_gen = False
         self.bit_8_available = None
+        self.supported_gen_modes = []
         
     def reset_for_model_load(self):
         self.simple_randomness = 0 #Set first as this affects other outputs
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 1d285576..e09249c3 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -147,7 +147,10 @@ class GenerationSettings:
 class ModelCapabilities:
     embedding_manipulation: bool = False
     post_token_hooks: bool = False
+
+    # Used to gauge if manual stopping is possible
     stopper_hooks: bool = False
+
     # TODO: Support non-live probabilities from APIs
     post_token_probs: bool = False
 
@@ -158,11 +161,11 @@ class ModelCapabilities:
     uses_tpu: bool = False
 
 class GenerationMode(Enum):
-    STANDARD = 0
-    FOREVER = 1
-    UNTIL_EOS = 2
-    UNTIL_NEWLINE = 3
-    UNTIL_SENTENCE_END = 4
+    STANDARD = "standard"
+    FOREVER = "forever"
+    UNTIL_EOS = "until_eos"
+    UNTIL_NEWLINE = "until_newline"
+    UNTIL_SENTENCE_END = "until_sentence_end"
 
 class InferenceModel:
     """Root class for all models."""
@@ -585,7 +588,13 @@ class InferenceModel:
         temp_stoppers = []
 
         if gen_mode == GenerationMode.FOREVER:
-            raise NotImplementedError()
+            if self.capabilties.stopper_hooks:
+                self.gen_state["stop_at_genamt"] = False
+                max_new = 1e7
+            else:
+                logger.warning(
+                    "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!"
+                )
         elif gen_mode == GenerationMode.UNTIL_EOS:
             # Still need to unban
             raise NotImplementedError()
@@ -652,3 +661,19 @@ class InferenceModel:
     def _post_token_gen(self, input_ids: torch.LongTensor) -> None:
         for hook in self.post_token_hooks:
             hook(self, input_ids)
+
+    def get_supported_gen_modes(self) -> List[GenerationMode]:
+        """Returns a list of compatible `GenerationMode`s for the current model.
+
+        Returns:
+            List[GenerationMode]: A list of compatible `GenerationMode`s.
+        """
+        ret = []
+        if self.capabilties.stopper_hooks:
+            ret += [
+                GenerationMode.FOREVER,
+                GenerationMode.UNTIL_EOS,
+                GenerationMode.UNTIL_NEWLINE,
+                GenerationMode.UNTIL_SENTENCE_END,
+            ]
+        return ret
\ No newline at end of file
diff --git a/static/koboldai.js b/static/koboldai.js
index cdb7bc79..d7560f54 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -83,6 +83,7 @@ let story_id = -1;
 var dirty_chunks = [];
 var initial_socketio_connection_occured = false;
 var selected_model_data;
+var supported_gen_modes = [];
 
 // Each entry into this array should be an object that looks like:
 // {class: "class", key: "key", func: callback}
@@ -151,11 +152,31 @@ const context_menu_actions = {
 	"submit-button": [
 		{label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()},
 		null,
-		{label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")},
-		{label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")},
+		{
+			label: "Generate Forever",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("forever"),
+			click: () => storySubmit("forever")
+		},
+		{
+			label: "Generate Until EOS",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_eos"),
+			click: () => storySubmit("until_eos")
+		},
 		null,
-		{label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")},
-		{label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")},
+		{
+			label: "Finish Line",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_newline"),
+			click: () => storySubmit("until_newline")
+		},
+		{
+			label: "Finish Sentence",
+			icon: "edit_off",
+			enabledOn: () => supported_gen_modes.includes("until_sentence_end"),
+			click: () => storySubmit("until_sentence_end")
+		},
 	],
 };
 
@@ -941,6 +962,9 @@ function var_changed(data) {
 	//special case for welcome text since we want to allow HTML
 	} else if (data.classname == 'model' && data.name == 'welcome') {
 		document.getElementById('welcome_text').innerHTML = data.value;
+	//Special case for permitted generation modes
+	} else if (data.classname == 'model' && data.name == 'supported_gen_modes') {
+		supported_gen_modes = data.value;
 	//Basic Data Syncing
 	} else {
 		var elements_to_change = document.getElementsByClassName("var_sync_"+data.classname.replace(" ", "_")+"_"+data.name.replace(" ", "_"));
@@ -6090,21 +6114,23 @@ process_cookies();
 				continue;
 			}
 
+			const enableCriteriaIsFunction = typeof action.enabledOn === "function"
 
-			let item = $e("div", contextMenu, {
+			const itemEl = $e("div", contextMenu, {
 				classes: ["context-menu-item", "noselect", `context-menu-${key}`],
-				"enabled-on": action.enabledOn,
+				"enabled-on": enableCriteriaIsFunction ? "CALLBACK" : action.enabledOn,
 				"cache-index": context_menu_cache.length
 			});
+			itemEl.enabledOnCallback = action.enabledOn;
 
 			context_menu_cache.push({shouldShow: action.shouldShow});
 
-			let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon});
-			$e("span", item, {classes: ["context-menu-label"], innerText: action.label});
+			const icon = $e("span", itemEl, {classes: ["material-icons-outlined"], innerText: action.icon});
+			$e("span", itemEl, {classes: ["context-menu-label"], innerText: action.label});
 
-			item.addEventListener("mousedown", e => e.preventDefault());
+			itemEl.addEventListener("mousedown", e => e.preventDefault());
 			// Expose the "summonEvent" to enable access to original context menu target.
-			item.addEventListener("click", () => action.click(summonEvent));
+			itemEl.addEventListener("click", () => action.click(summonEvent));
 		}
 	}
 
@@ -6154,10 +6180,10 @@ process_cookies();
 
 		// Disable non-applicable items
 		$(".context-menu-item").addClass("disabled");
-		
+
 		// A selection is made
 		if (getSelectionText()) $(".context-menu-item[enabled-on=SELECTION]").removeClass("disabled");
-		
+
 		// The caret is placed
 		if (get_caret_position(target) !== null) $(".context-menu-item[enabled-on=CARET]").removeClass("disabled");
 
@@ -6166,6 +6192,11 @@ process_cookies();
 
 		$(".context-menu-item[enabled-on=ALWAYS]").removeClass("disabled");
 
+		for (const contextMenuItem of document.querySelectorAll(".context-menu-item[enabled-on=CALLBACK]")) {
+			if (!contextMenuItem.enabledOnCallback()) continue;
+			contextMenuItem.classList.remove("disabled");
+		}
+
 		// Make sure hr isn't first or last visible element
 		let visibles = [];
 		for (const item of contextMenu.children) {

From c78401bd124be939134c1f50ac831c434697b681 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:22:14 -0500
Subject: [PATCH 09/11] Fix gen mode on first generation

---
 aiserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index ba224b3c..0bfaca22 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -3371,7 +3371,7 @@ def actionsubmit(
                 koboldai_vars.prompt = data
                 # Clear the startup text from game screen
                 emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1")
-                calcsubmit("") # Run the first action through the generator
+                calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator
                 if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0):
                     data = ""
                     force_submit = True
@@ -6205,7 +6205,7 @@ def UI_2_submit(data):
         logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!")
 
     actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode)
- 
+
  #==================================================================#
 # Event triggered when user clicks the submit button
 #==================================================================#

From e5d0a597a1806815ca7463a6536d6719ceb8d165 Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:36:32 -0500
Subject: [PATCH 10/11] Generation Mode: UNTIL_EOS

This mode enables the EOS token and will generate infinitely until
hitting it.
---
 modeling/inference_model.py           | 24 +++++++++++++-----------
 modeling/inference_models/hf_torch.py | 13 ++++++++++++-
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index e09249c3..8b7f0e3e 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -585,19 +585,21 @@ class InferenceModel:
             "wi_scanner_excluded_keys", set()
         )
 
+        self.gen_state["allow_eos"] = False
+
         temp_stoppers = []
 
+        if gen_mode not in self.get_supported_gen_modes():
+            gen_mode = GenerationMode.STANDARD
+            logger.warning(f"User requested unsupported GenerationMode '{gen_mode}'!")
+
         if gen_mode == GenerationMode.FOREVER:
-            if self.capabilties.stopper_hooks:
-                self.gen_state["stop_at_genamt"] = False
-                max_new = 1e7
-            else:
-                logger.warning(
-                    "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!"
-                )
+            self.gen_state["stop_at_genamt"] = False
+            max_new = 1e7
         elif gen_mode == GenerationMode.UNTIL_EOS:
-            # Still need to unban
-            raise NotImplementedError()
+            self.gen_state["allow_eos"] = True
+            self.gen_state["stop_at_genamt"] = False
+            max_new = 1e7
         elif gen_mode == GenerationMode.UNTIL_NEWLINE:
             # TODO: Look into replacing `single_line` with `generation_mode`
             temp_stoppers.append(Stoppers.newline_stopper)
@@ -668,11 +670,11 @@ class InferenceModel:
         Returns:
             List[GenerationMode]: A list of compatible `GenerationMode`s.
         """
-        ret = []
+        ret = [GenerationMode.STANDARD]
+
         if self.capabilties.stopper_hooks:
             ret += [
                 GenerationMode.FOREVER,
-                GenerationMode.UNTIL_EOS,
                 GenerationMode.UNTIL_NEWLINE,
                 GenerationMode.UNTIL_SENTENCE_END,
             ]
diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
index 1b411c95..b4909f60 100644
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -31,6 +31,7 @@ from modeling.stoppers import Stoppers
 from modeling.post_token_hooks import PostTokenHooks
 from modeling.inference_models.hf import HFInferenceModel
 from modeling.inference_model import (
+    GenerationMode,
     GenerationResult,
     GenerationSettings,
     ModelCapabilities,
@@ -254,7 +255,11 @@ class HFTorchInferenceModel(HFInferenceModel):
             kwargs["logits_warper"] = new_get_logits_warper(
                 beams=1,
             )
-            if utils.koboldai_vars.newlinemode in ["s", "ns"]:
+
+            if (
+                utils.koboldai_vars.newlinemode in ["s", "ns"]
+                and not m_self.gen_state["allow_eos"]
+            ):
                 kwargs["eos_token_id"] = -1
                 kwargs.setdefault("pad_token_id", 2)
             return new_sample.old_sample(self, *args, **kwargs)
@@ -605,3 +610,9 @@ class HFTorchInferenceModel(HFInferenceModel):
             self.breakmodel = False
             self.usegpu = False
             return
+
+    def get_supported_gen_modes(self) -> List[GenerationMode]:
+        # This changes a torch patch to disallow eos as a bad word.
+        return super().get_supported_gen_modes() + [
+            GenerationMode.UNTIL_EOS
+        ]
\ No newline at end of file

From 6e7b0794ea80c9eae1a6bc4f89590e3d657febea Mon Sep 17 00:00:00 2001
From: somebody <onesomegit@outlook.com>
Date: Fri, 21 Jul 2023 15:40:07 -0500
Subject: [PATCH 11/11] Context Menu: Fix for elements with a context-menu
 attribute but...

...without an entry in `context_menu_items`.
---
 static/koboldai.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/static/koboldai.js b/static/koboldai.js
index d7560f54..b25bef31 100644
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -6153,6 +6153,10 @@ process_cookies();
 
 		// Show only applicable actions in the context menu
 		let contextMenuType = target.getAttribute("context-menu");
+
+		// If context menu is not present, return
+		if (!context_menu_actions[contextMenuType]) return;
+
 		for (const contextMenuItem of contextMenu.childNodes) {
 			let shouldShow = contextMenuItem.classList.contains(`context-menu-${contextMenuType}`);