From 34a98d2962678b468b833ec3c506c690e4c50e8e Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:48:02 -0500 Subject: [PATCH 01/11] Context Menu: Small visual fixes woohooooo back to css - fixes margins to look better - moves contents of context menu items 1px down - fixes context menus near edge wrapping their inner text (ew) --- static/koboldai.css | 10 ++++++++-- static/koboldai.js | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/static/koboldai.css b/static/koboldai.css index 3252c21a..3ad643d2 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -2705,13 +2705,14 @@ body { #context-menu > hr { /* Division Color*/ border-top: 2px solid var(--context_menu_division); - margin: 5px 5px; + margin: 3px 5px; } .context-menu-item { padding: 5px; padding-right: 25px; min-width: 100px; + white-space: nowrap; } .context-menu-item:hover { @@ -2722,11 +2723,16 @@ body { .context-menu-item > .material-icons-outlined { position: relative; - top: 2px; + top: 3px; font-size: 15px; margin-right: 5px; } +.context-menu-item > .context-menu-label { + position: relative; + top: 1px; +} + /* Substitutions */ #Substitutions { margin-left: 10px; diff --git a/static/koboldai.js b/static/koboldai.js index 8b70dd6a..e8053f23 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -6071,7 +6071,7 @@ process_cookies(); context_menu_cache.push({shouldShow: action.shouldShow}); let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon}); - item.append(action.label); + $e("span", item, {classes: ["context-menu-label"], innerText: action.label}); item.addEventListener("mousedown", e => e.preventDefault()); // Expose the "summonEvent" to enable access to original context menu target. From 4921040fb462dac00ba8a028a8b22e9524f9f740 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:52:12 -0500 Subject: [PATCH 02/11] Context Menu: Make things a little less bloaty 5px was a bit excessive TODO: studied the context menu in my browser for a bit and noticed that if it was going to be too close to the bottom, the browser changes the vertical direction the context menu goes. sounds neat! --- static/koboldai.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/koboldai.css b/static/koboldai.css index 3ad643d2..b83384c4 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -2709,7 +2709,7 @@ body { } .context-menu-item { - padding: 5px; + padding: 4px; padding-right: 25px; min-width: 100px; white-space: nowrap; From 46c377b0c362d715e1c37f423ab763f367c32299 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:53:48 -0500 Subject: [PATCH 03/11] Context Menu: Add stubs for new temporary stoppingcriteria idea I think this would be cool! Ideas: - disable/grey when model doesnt support stopping criteria - shortcuts (maybe, this would def be a power user thing) - option to generate until EOS token - option to generate forever until user manually stops - (not super related but pixels away) make retry while generation is ongoing cancel generation and retry. same with undo. --- static/koboldai.js | 6 ++++++ templates/index_new.html | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index e8053f23..64da7146 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -147,6 +147,12 @@ const context_menu_actions = { "wi-img-upload-button": [ {label: "Upload Image", icon: "file_upload", enabledOn: "ALWAYS", click: wiImageReplace}, {label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage}, + ], + "submit-button": [ + {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, ] }; diff --git a/templates/index_new.html b/templates/index_new.html index 99b8c941..53bcffd5 100644 --- a/templates/index_new.html +++ b/templates/index_new.html @@ -110,7 +110,7 @@ - + From 6cf63f781a3c17ab6b41c5f12cd05f824be9ba04 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 01:58:57 -0500 Subject: [PATCH 04/11] YEAAAAAAAAAA --- static/koboldai.js | 11 +++++++++++ templates/index_new.html | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index 64da7146..75563df2 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -151,8 +151,19 @@ const context_menu_actions = { "submit-button": [ {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, null, + {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + null, {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + ], + "undo-button": [ + {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}}, + {label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}}, ] }; diff --git a/templates/index_new.html b/templates/index_new.html index 53bcffd5..2b1c0ddf 100644 --- a/templates/index_new.html +++ b/templates/index_new.html @@ -112,7 +112,7 @@ - + From 3a43b254b86733a637a2286bf8a3c9421674771a Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:27:30 -0500 Subject: [PATCH 05/11] Add basic support for some of the quick stoppers --- aiserver.py | 64 +++++++++++++++++++++++++++---------- modeling/inference_model.py | 32 +++++++++++++++++++ modeling/stoppers.py | 52 ++++++++++++++++++++++++------ static/koboldai.js | 23 ++++++++----- 4 files changed, 137 insertions(+), 34 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0aa9bd4c..1cb9146e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -12,6 +12,8 @@ import random import shutil import eventlet +from modeling.inference_model import GenerationMode + eventlet.monkey_patch(all=True, thread=False, os=False) import os, inspect, contextlib, pickle os.system("") @@ -3266,7 +3268,16 @@ def check_for_backend_compilation(): break koboldai_vars.checking = False -def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False): +def actionsubmit( + data, + actionmode=0, + force_submit=False, + force_prompt_gen=False, + disable_recentrng=False, + no_generate=False, + ignore_aibusy=False, + gen_mode=GenerationMode.STANDARD +): # Ignore new submissions if the AI is currently busy if(koboldai_vars.aibusy): return @@ -3424,7 +3435,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating): # Off to the tokenizer! - calcsubmit("") + calcsubmit("", gen_mode=gen_mode) if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0): data = "" force_submit = True @@ -3779,7 +3790,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, #==================================================================# # Take submitted text and build the text to be given to generator #==================================================================# -def calcsubmit(txt): +def calcsubmit(txt, gen_mode=GenerationMode.STANDARD): anotetxt = "" # Placeholder for Author's Note text forceanote = False # In case we don't have enough actions to hit A.N. depth anoteadded = False # In case our budget runs out before we hit A.N. depth @@ -3821,7 +3832,7 @@ def calcsubmit(txt): logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time)) start_time = time.time() - generate(subtxt, min, max, found_entries) + generate(subtxt, min, max, found_entries, gen_mode=gen_mode) logger.debug("Submit: generate time {}s".format(time.time()-start_time)) attention_bias.attention_bias = None @@ -3889,7 +3900,7 @@ class HordeException(Exception): # Send text to generator and deal with output #==================================================================# -def generate(txt, minimum, maximum, found_entries=None): +def generate(txt, minimum, maximum, found_entries=None, gen_mode=GenerationMode.STANDARD): koboldai_vars.generated_tkns = 0 if(found_entries is None): @@ -3911,7 +3922,7 @@ def generate(txt, minimum, maximum, found_entries=None): # Submit input text to generator try: start_time = time.time() - genout, already_generated = tpool.execute(model.core_generate, txt, found_entries) + genout, already_generated = tpool.execute(model.core_generate, txt, found_entries, gen_mode=gen_mode) logger.debug("Generate: core_generate time {}s".format(time.time()-start_time)) except Exception as e: if(issubclass(type(e), lupa.LuaError)): @@ -6168,22 +6179,43 @@ def UI_2_delete_option(data): @socketio.on('submit') @logger.catch def UI_2_submit(data): - if not koboldai_vars.noai and data['theme'] != "": + if not koboldai_vars.noai and data['theme']: + # Random prompt generation logger.debug("doing random prompt") memory = koboldai_vars.memory koboldai_vars.memory = "{}\n\nYou generate the following {} story concept :".format(koboldai_vars.memory, data['theme']) koboldai_vars.lua_koboldbridge.feedback = None actionsubmit("", force_submit=True, force_prompt_gen=True) koboldai_vars.memory = memory - else: - logger.debug("doing normal input") - koboldai_vars.actions.clear_unused_options() - koboldai_vars.lua_koboldbridge.feedback = None - koboldai_vars.recentrng = koboldai_vars.recentrngm = None - if koboldai_vars.actions.action_count == -1: - actionsubmit(data['data'], actionmode=koboldai_vars.actionmode) - else: - actionsubmit(data['data'], actionmode=koboldai_vars.actionmode) + return + + logger.debug("doing normal input") + koboldai_vars.actions.clear_unused_options() + koboldai_vars.lua_koboldbridge.feedback = None + koboldai_vars.recentrng = koboldai_vars.recentrngm = None + + gen_mode_name = data.get("gen_mode", None) + gen_mode = { + # If we don't have a gen mode, or it's None (the default), just do a + # normal submission. + None: GenerationMode.STANDARD, + + # NOTE: forever should be a no-op on models that don't support + # interrupting generation. This should be conveyed to the user by + # graying out the option in the context menu. + "forever": GenerationMode.FOREVER, + + # The following gen modes require stopping criteria to be respected by + # the backend: + "until_eos": GenerationMode.UNTIL_EOS, + "until_newline": GenerationMode.UNTIL_NEWLINE, + "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END, + }.get(gen_mode_name, None) + + if not gen_mode: + raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'") + + actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) #==================================================================# # Event triggered when user clicks the submit button diff --git a/modeling/inference_model.py b/modeling/inference_model.py index a2d4fa63..1d285576 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -3,6 +3,8 @@ from __future__ import annotations from dataclasses import dataclass import time from typing import List, Optional, Union + +from enum import Enum from logger import logger import torch @@ -12,6 +14,7 @@ from transformers import ( GPT2Tokenizer, AutoTokenizer, ) +from modeling.stoppers import Stoppers from modeling.tokenizer import GenericTokenizer from modeling import logits_processors @@ -154,6 +157,12 @@ class ModelCapabilities: # Some models need to warm up the TPU before use uses_tpu: bool = False +class GenerationMode(Enum): + STANDARD = 0 + FOREVER = 1 + UNTIL_EOS = 2 + UNTIL_NEWLINE = 3 + UNTIL_SENTENCE_END = 4 class InferenceModel: """Root class for all models.""" @@ -256,6 +265,7 @@ class InferenceModel: self, text: list, found_entries: set, + gen_mode: GenerationMode = GenerationMode.STANDARD, ): """Generate story text. Heavily tied to story-specific parameters; if you are making a new generation-based feature, consider `generate_raw()`. @@ -263,6 +273,7 @@ class InferenceModel: Args: text (list): Encoded input tokens found_entries (set): Entries found for Dynamic WI + gen_mode (GenerationMode): The GenerationMode to pass to raw_generate. Defaults to GenerationMode.STANDARD Raises: RuntimeError: if inconsistancies are detected with the internal state and Lua state -- sanity check @@ -358,6 +369,7 @@ class InferenceModel: seed=utils.koboldai_vars.seed if utils.koboldai_vars.full_determinism else None, + gen_mode=gen_mode ) logger.debug( "core_generate: run raw_generate pass {} {}s".format( @@ -532,6 +544,7 @@ class InferenceModel: found_entries: set = (), tpu_dynamic_inference: bool = False, seed: Optional[int] = None, + gen_mode: GenerationMode = GenerationMode.STANDARD, **kwargs, ) -> GenerationResult: """A wrapper around `_raw_generate()` that handles gen_state and other stuff. Use this to generate text outside of the story. @@ -547,6 +560,7 @@ class InferenceModel: is_core (bool, optional): Whether this generation is a core story generation. Defaults to False. single_line (bool, optional): Generate one line only.. Defaults to False. found_entries (set, optional): Entries found for Dynamic WI. Defaults to (). + gen_mode (GenerationMode): Special generation mode. Defaults to GenerationMode.STANDARD. Raises: ValueError: If prompt type is weird @@ -568,6 +582,21 @@ class InferenceModel: "wi_scanner_excluded_keys", set() ) + temp_stoppers = [] + + if gen_mode == GenerationMode.FOREVER: + raise NotImplementedError() + elif gen_mode == GenerationMode.UNTIL_EOS: + # Still need to unban + raise NotImplementedError() + elif gen_mode == GenerationMode.UNTIL_NEWLINE: + # TODO: Look into replacing `single_line` with `generation_mode` + temp_stoppers.append(Stoppers.newline_stopper) + elif gen_mode == GenerationMode.UNTIL_SENTENCE_END: + temp_stoppers.append(Stoppers.sentence_end_stopper) + + self.stopper_hooks += temp_stoppers + utils.koboldai_vars.inference_config.do_core = is_core gen_settings = GenerationSettings(*(generation_settings or {})) @@ -604,6 +633,9 @@ class InferenceModel: f"Generated {len(result.encoded[0])} tokens in {time_end} seconds, for an average rate of {tokens_per_second} tokens per second." ) + for stopper in temp_stoppers: + self.stopper_hooks.remove(stopper) + return result def generate( diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 94c09e85..02c1ce48 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -3,15 +3,12 @@ from __future__ import annotations import torch import utils -from modeling.inference_model import ( - InferenceModel, -) - +from modeling import inference_model class Stoppers: @staticmethod def core_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.inference_config.do_core: @@ -62,7 +59,7 @@ class Stoppers: @staticmethod def dynamic_wi_scanner( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.inference_config.do_dynamic_wi: @@ -93,7 +90,7 @@ class Stoppers: @staticmethod def chat_mode_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.chatmode: @@ -118,7 +115,7 @@ class Stoppers: @staticmethod def stop_sequence_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: @@ -145,14 +142,22 @@ class Stoppers: @staticmethod def singleline_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: - """If singleline mode is enabled, it's pointless to generate output beyond the first newline.""" + """Stop on occurances of newlines **if singleline is enabled**.""" + # It might be better just to do this further up the line if not utils.koboldai_vars.singleline: return False + return Stoppers.newline_stopper(model, input_ids) + @staticmethod + def newline_stopper( + model: inference_model.InferenceModel, + input_ids: torch.LongTensor, + ) -> bool: + """Stop on occurances of newlines.""" # Keep track of presence of newlines in each sequence; we cannot stop a # batch member individually, so we must wait for all of them to contain # a newline. @@ -167,3 +172,30 @@ class Stoppers: del model.gen_state["newline_in_sequence"] return True return False + + @staticmethod + def sentence_end_stopper( + model: inference_model.InferenceModel, + input_ids: torch.LongTensor, + ) -> bool: + """Stops at the end of sentences.""" + + # TODO: Make this more robust + SENTENCE_ENDS = [".", "?", "!"] + + # We need to keep track of stopping for each batch, since we can't stop + # one individually. + if "sentence_end_in_sequence" not in model.gen_state: + model.gen_state["sentence_end_sequence"] = [False] * len(input_ids) + + for sequence_idx, batch_sequence in enumerate(input_ids): + decoded = model.tokenizer.decode(batch_sequence[-1]) + for end in SENTENCE_ENDS: + if end in decoded: + model.gen_state["sentence_end_sequence"][sequence_idx] = True + break + + if all(model.gen_state["sentence_end_sequence"]): + del model.gen_state["sentence_end_sequence"] + return True + return False \ No newline at end of file diff --git a/static/koboldai.js b/static/koboldai.js index 75563df2..320ec927 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -149,13 +149,13 @@ const context_menu_actions = { {label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage}, ], "submit-button": [ - {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()}, null, - {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, - {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")}, + {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")}, null, - {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, - {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, + {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, ], "undo-button": [ {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, @@ -256,10 +256,17 @@ function disconnect() { document.getElementById("disconnect_message").classList.remove("hidden"); } -function storySubmit() { +function storySubmit(genMode=null) { + const textInput = document.getElementById("input_text"); + const themeInput = document.getElementById("themetext"); disruptStoryState(); - socket.emit('submit', {'data': document.getElementById('input_text').value, 'theme': document.getElementById('themetext').value}); - document.getElementById('input_text').value = ''; + socket.emit('submit', { + data: textInput.value, + theme: themeInput.value, + gen_mode: genMode, + }); + + textInput.value = ''; document.getElementById('themetext').value = ''; } From 1c4157a41b753b8e3dd4246770c3cfa889485306 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:33:38 -0500 Subject: [PATCH 06/11] Maybe another time too many ideas at once --- static/koboldai.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/static/koboldai.js b/static/koboldai.js index 320ec927..8ccac9dc 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -157,14 +157,6 @@ const context_menu_actions = { {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, ], - "undo-button": [ - {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, - null, - {label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}}, - {label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}}, - null, - {label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}}, - ] }; let context_menu_cache = []; From b8671cce09e83c4c64239951edf0150ef8b8d190 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:48:23 -0500 Subject: [PATCH 07/11] Context Menu: Change positioning algorithm for y-axis --- static/koboldai.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index 8ccac9dc..cdb7bc79 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -5818,8 +5818,21 @@ function position_context_menu(contextMenu, x, y) { right: x + width, }; + // Slide over if running against the window bounds. if (farMenuBounds.right > bounds.right) x -= farMenuBounds.right - bounds.right; - if (farMenuBounds.bottom > bounds.bottom) y -= farMenuBounds.bottom - bounds.bottom; + + if (farMenuBounds.bottom > bounds.bottom) { + // We've hit the bottom. + + // The old algorithm pushed the menu against the wall, similar to what's + // done on the x-axis: + // y -= farMenuBounds.bottom - bounds.bottom; + // But now, we make the box change its emission direction from the cursor: + y -= (height + 5); + // The main advantage of this approach is that the cursor is never directly + // placed above a context menu item immediately after activating the context + // menu. (Thus the 5px offset also added) + } contextMenu.style.left = `${x}px`; contextMenu.style.top = `${y}px`; From 8d5ae38b4568e1dbc893c8adfb7d3ac4a8bd57c2 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 14:29:41 -0500 Subject: [PATCH 08/11] Context Menu: Show if gen mode is supported - adds callback support to `enabledOn` in context menu items - adds `supported_gen_modes` variable for frontend to check if a gen mode is supported - adds `get_supported_gen_modes` to `InferenceModel` to get supported gen modes - takes advantage of cool enum features for less enum-handling code --- aiserver.py | 29 ++++++------------- koboldai_settings.py | 1 + modeling/inference_model.py | 37 +++++++++++++++++++++---- static/koboldai.js | 55 +++++++++++++++++++++++++++++-------- 4 files changed, 84 insertions(+), 38 deletions(-) diff --git a/aiserver.py b/aiserver.py index 1cb9146e..ba224b3c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1790,7 +1790,9 @@ def load_model(model_backend, initial_load=False): with use_custom_unpickler(RestrictedUnpickler): model = model_backends[model_backend] + koboldai_vars.supported_gen_modes = [x.value for x in model.get_supported_gen_modes()] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) + koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"): koboldai_vars.model = os.path.basename(os.path.normpath(model.path)) @@ -6194,26 +6196,13 @@ def UI_2_submit(data): koboldai_vars.lua_koboldbridge.feedback = None koboldai_vars.recentrng = koboldai_vars.recentrngm = None - gen_mode_name = data.get("gen_mode", None) - gen_mode = { - # If we don't have a gen mode, or it's None (the default), just do a - # normal submission. - None: GenerationMode.STANDARD, - - # NOTE: forever should be a no-op on models that don't support - # interrupting generation. This should be conveyed to the user by - # graying out the option in the context menu. - "forever": GenerationMode.FOREVER, - - # The following gen modes require stopping criteria to be respected by - # the backend: - "until_eos": GenerationMode.UNTIL_EOS, - "until_newline": GenerationMode.UNTIL_NEWLINE, - "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END, - }.get(gen_mode_name, None) - - if not gen_mode: - raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'") + gen_mode_name = data.get("gen_mode", None) or "standard" + try: + gen_mode = GenerationMode(gen_mode_name) + except ValueError: + # Invalid enum lookup! + gen_mode = GenerationMode.STANDARD + logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!") actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) diff --git a/koboldai_settings.py b/koboldai_settings.py index ebd8c019..f061beb1 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -685,6 +685,7 @@ class model_settings(settings): self._koboldai_vars = koboldai_vars self.alt_multi_gen = False self.bit_8_available = None + self.supported_gen_modes = [] def reset_for_model_load(self): self.simple_randomness = 0 #Set first as this affects other outputs diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 1d285576..e09249c3 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -147,7 +147,10 @@ class GenerationSettings: class ModelCapabilities: embedding_manipulation: bool = False post_token_hooks: bool = False + + # Used to gauge if manual stopping is possible stopper_hooks: bool = False + # TODO: Support non-live probabilities from APIs post_token_probs: bool = False @@ -158,11 +161,11 @@ class ModelCapabilities: uses_tpu: bool = False class GenerationMode(Enum): - STANDARD = 0 - FOREVER = 1 - UNTIL_EOS = 2 - UNTIL_NEWLINE = 3 - UNTIL_SENTENCE_END = 4 + STANDARD = "standard" + FOREVER = "forever" + UNTIL_EOS = "until_eos" + UNTIL_NEWLINE = "until_newline" + UNTIL_SENTENCE_END = "until_sentence_end" class InferenceModel: """Root class for all models.""" @@ -585,7 +588,13 @@ class InferenceModel: temp_stoppers = [] if gen_mode == GenerationMode.FOREVER: - raise NotImplementedError() + if self.capabilties.stopper_hooks: + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 + else: + logger.warning( + "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!" + ) elif gen_mode == GenerationMode.UNTIL_EOS: # Still need to unban raise NotImplementedError() @@ -652,3 +661,19 @@ class InferenceModel: def _post_token_gen(self, input_ids: torch.LongTensor) -> None: for hook in self.post_token_hooks: hook(self, input_ids) + + def get_supported_gen_modes(self) -> List[GenerationMode]: + """Returns a list of compatible `GenerationMode`s for the current model. + + Returns: + List[GenerationMode]: A list of compatible `GenerationMode`s. + """ + ret = [] + if self.capabilties.stopper_hooks: + ret += [ + GenerationMode.FOREVER, + GenerationMode.UNTIL_EOS, + GenerationMode.UNTIL_NEWLINE, + GenerationMode.UNTIL_SENTENCE_END, + ] + return ret \ No newline at end of file diff --git a/static/koboldai.js b/static/koboldai.js index cdb7bc79..d7560f54 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -83,6 +83,7 @@ let story_id = -1; var dirty_chunks = []; var initial_socketio_connection_occured = false; var selected_model_data; +var supported_gen_modes = []; // Each entry into this array should be an object that looks like: // {class: "class", key: "key", func: callback} @@ -151,11 +152,31 @@ const context_menu_actions = { "submit-button": [ {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()}, null, - {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")}, - {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")}, + { + label: "Generate Forever", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("forever"), + click: () => storySubmit("forever") + }, + { + label: "Generate Until EOS", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_eos"), + click: () => storySubmit("until_eos") + }, null, - {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, - {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, + { + label: "Finish Line", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_newline"), + click: () => storySubmit("until_newline") + }, + { + label: "Finish Sentence", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_sentence_end"), + click: () => storySubmit("until_sentence_end") + }, ], }; @@ -941,6 +962,9 @@ function var_changed(data) { //special case for welcome text since we want to allow HTML } else if (data.classname == 'model' && data.name == 'welcome') { document.getElementById('welcome_text').innerHTML = data.value; + //Special case for permitted generation modes + } else if (data.classname == 'model' && data.name == 'supported_gen_modes') { + supported_gen_modes = data.value; //Basic Data Syncing } else { var elements_to_change = document.getElementsByClassName("var_sync_"+data.classname.replace(" ", "_")+"_"+data.name.replace(" ", "_")); @@ -6090,21 +6114,23 @@ process_cookies(); continue; } + const enableCriteriaIsFunction = typeof action.enabledOn === "function" - let item = $e("div", contextMenu, { + const itemEl = $e("div", contextMenu, { classes: ["context-menu-item", "noselect", `context-menu-${key}`], - "enabled-on": action.enabledOn, + "enabled-on": enableCriteriaIsFunction ? "CALLBACK" : action.enabledOn, "cache-index": context_menu_cache.length }); + itemEl.enabledOnCallback = action.enabledOn; context_menu_cache.push({shouldShow: action.shouldShow}); - let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon}); - $e("span", item, {classes: ["context-menu-label"], innerText: action.label}); + const icon = $e("span", itemEl, {classes: ["material-icons-outlined"], innerText: action.icon}); + $e("span", itemEl, {classes: ["context-menu-label"], innerText: action.label}); - item.addEventListener("mousedown", e => e.preventDefault()); + itemEl.addEventListener("mousedown", e => e.preventDefault()); // Expose the "summonEvent" to enable access to original context menu target. - item.addEventListener("click", () => action.click(summonEvent)); + itemEl.addEventListener("click", () => action.click(summonEvent)); } } @@ -6154,10 +6180,10 @@ process_cookies(); // Disable non-applicable items $(".context-menu-item").addClass("disabled"); - + // A selection is made if (getSelectionText()) $(".context-menu-item[enabled-on=SELECTION]").removeClass("disabled"); - + // The caret is placed if (get_caret_position(target) !== null) $(".context-menu-item[enabled-on=CARET]").removeClass("disabled"); @@ -6166,6 +6192,11 @@ process_cookies(); $(".context-menu-item[enabled-on=ALWAYS]").removeClass("disabled"); + for (const contextMenuItem of document.querySelectorAll(".context-menu-item[enabled-on=CALLBACK]")) { + if (!contextMenuItem.enabledOnCallback()) continue; + contextMenuItem.classList.remove("disabled"); + } + // Make sure hr isn't first or last visible element let visibles = []; for (const item of contextMenu.children) { From c78401bd124be939134c1f50ac831c434697b681 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:22:14 -0500 Subject: [PATCH 09/11] Fix gen mode on first generation --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index ba224b3c..0bfaca22 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3371,7 +3371,7 @@ def actionsubmit( koboldai_vars.prompt = data # Clear the startup text from game screen emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1") - calcsubmit("") # Run the first action through the generator + calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0): data = "" force_submit = True @@ -6205,7 +6205,7 @@ def UI_2_submit(data): logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!") actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) - + #==================================================================# # Event triggered when user clicks the submit button #==================================================================# From e5d0a597a1806815ca7463a6536d6719ceb8d165 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:36:32 -0500 Subject: [PATCH 10/11] Generation Mode: UNTIL_EOS This mode enables the EOS token and will generate infinitely until hitting it. --- modeling/inference_model.py | 24 +++++++++++++----------- modeling/inference_models/hf_torch.py | 13 ++++++++++++- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index e09249c3..8b7f0e3e 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -585,19 +585,21 @@ class InferenceModel: "wi_scanner_excluded_keys", set() ) + self.gen_state["allow_eos"] = False + temp_stoppers = [] + if gen_mode not in self.get_supported_gen_modes(): + gen_mode = GenerationMode.STANDARD + logger.warning(f"User requested unsupported GenerationMode '{gen_mode}'!") + if gen_mode == GenerationMode.FOREVER: - if self.capabilties.stopper_hooks: - self.gen_state["stop_at_genamt"] = False - max_new = 1e7 - else: - logger.warning( - "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!" - ) + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 elif gen_mode == GenerationMode.UNTIL_EOS: - # Still need to unban - raise NotImplementedError() + self.gen_state["allow_eos"] = True + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 elif gen_mode == GenerationMode.UNTIL_NEWLINE: # TODO: Look into replacing `single_line` with `generation_mode` temp_stoppers.append(Stoppers.newline_stopper) @@ -668,11 +670,11 @@ class InferenceModel: Returns: List[GenerationMode]: A list of compatible `GenerationMode`s. """ - ret = [] + ret = [GenerationMode.STANDARD] + if self.capabilties.stopper_hooks: ret += [ GenerationMode.FOREVER, - GenerationMode.UNTIL_EOS, GenerationMode.UNTIL_NEWLINE, GenerationMode.UNTIL_SENTENCE_END, ] diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 1b411c95..b4909f60 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -31,6 +31,7 @@ from modeling.stoppers import Stoppers from modeling.post_token_hooks import PostTokenHooks from modeling.inference_models.hf import HFInferenceModel from modeling.inference_model import ( + GenerationMode, GenerationResult, GenerationSettings, ModelCapabilities, @@ -254,7 +255,11 @@ class HFTorchInferenceModel(HFInferenceModel): kwargs["logits_warper"] = new_get_logits_warper( beams=1, ) - if utils.koboldai_vars.newlinemode in ["s", "ns"]: + + if ( + utils.koboldai_vars.newlinemode in ["s", "ns"] + and not m_self.gen_state["allow_eos"] + ): kwargs["eos_token_id"] = -1 kwargs.setdefault("pad_token_id", 2) return new_sample.old_sample(self, *args, **kwargs) @@ -605,3 +610,9 @@ class HFTorchInferenceModel(HFInferenceModel): self.breakmodel = False self.usegpu = False return + + def get_supported_gen_modes(self) -> List[GenerationMode]: + # This changes a torch patch to disallow eos as a bad word. + return super().get_supported_gen_modes() + [ + GenerationMode.UNTIL_EOS + ] \ No newline at end of file From 6e7b0794ea80c9eae1a6bc4f89590e3d657febea Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:40:07 -0500 Subject: [PATCH 11/11] Context Menu: Fix for elements with a context-menu attribute but... ...without an entry in `context_menu_items`. --- static/koboldai.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/static/koboldai.js b/static/koboldai.js index d7560f54..b25bef31 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -6153,6 +6153,10 @@ process_cookies(); // Show only applicable actions in the context menu let contextMenuType = target.getAttribute("context-menu"); + + // If context menu is not present, return + if (!context_menu_actions[contextMenuType]) return; + for (const contextMenuItem of contextMenu.childNodes) { let shouldShow = contextMenuItem.classList.contains(`context-menu-${contextMenuType}`);