From 73c06bf0a51bc73ab51f060213995abfe872f136 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 15 Jun 2023 16:02:20 -0500 Subject: [PATCH 001/107] add adventuremode stopper adds a stopper token for adventure mode when it detects the bot generating impersonating text after " > You" --- modeling/stoppers.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 94c09e85..0fc6ce10 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -116,6 +116,28 @@ class Stoppers: return True return False + def adventure_mode_stopper( + model: InferenceModel, + input_ids: torch.LongTensor, + ) -> bool: + if not utils.koboldai_vars.adventure: + return False + + data = [model.tokenizer.decode(x) for x in input_ids] + # null_character = model.tokenizer.encode(chr(0))[0] + if "completed" not in model.gen_state: + model.gen_state["completed"] = [False] * len(input_ids) + + for i in range(len(input_ids)): + if (data[i][-6:] == " > You"): + model.gen_state["completed"][i] = True + + if all(model.gen_state["completed"]): + utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt + del model.gen_state["completed"] + return True + return False + @staticmethod def stop_sequence_stopper( model: InferenceModel, From 877028ec7f2125cd6d550d5a913ca6b602ddad69 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 15 Jun 2023 16:07:54 -0500 Subject: [PATCH 002/107] Update hf_torch.py with adv mode stopper --- modeling/inference_models/hf_torch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 2f575e73..10fd3cb6 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -69,6 +69,7 @@ class HFTorchInferenceModel(HFInferenceModel): Stoppers.dynamic_wi_scanner, Stoppers.singleline_stopper, Stoppers.chat_mode_stopper, + Stoppers.adventure_mode_stopper, Stoppers.stop_sequence_stopper, ] From 83493dff2e7d76fad008dad3cfe8b6c400134d34 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 15 Jun 2023 17:15:33 -0500 Subject: [PATCH 003/107] modify adv stopper --- modeling/stoppers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 0fc6ce10..8fe5c229 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -129,7 +129,7 @@ class Stoppers: model.gen_state["completed"] = [False] * len(input_ids) for i in range(len(input_ids)): - if (data[i][-6:] == " > You"): + if (data[i][-6:] == "> You " or data[i][-4:] == "You:"): model.gen_state["completed"][i] = True if all(model.gen_state["completed"]): From 8b742b2bd4e346e6ec45d4642a6af374402697ad Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 15 Jun 2023 17:20:38 -0500 Subject: [PATCH 004/107] add missing @staticmethod --- modeling/stoppers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 8fe5c229..d9f212dd 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -116,6 +116,7 @@ class Stoppers: return True return False + @staticmethod def adventure_mode_stopper( model: InferenceModel, input_ids: torch.LongTensor, From 91d543bf5a41fda8dfd5d166b47f36bf5e420cb1 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:34:08 -0500 Subject: [PATCH 005/107] Update stoppers.py --- modeling/stoppers.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index d9f212dd..e36eb522 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -116,28 +116,6 @@ class Stoppers: return True return False - @staticmethod - def adventure_mode_stopper( - model: InferenceModel, - input_ids: torch.LongTensor, - ) -> bool: - if not utils.koboldai_vars.adventure: - return False - - data = [model.tokenizer.decode(x) for x in input_ids] - # null_character = model.tokenizer.encode(chr(0))[0] - if "completed" not in model.gen_state: - model.gen_state["completed"] = [False] * len(input_ids) - - for i in range(len(input_ids)): - if (data[i][-6:] == "> You " or data[i][-4:] == "You:"): - model.gen_state["completed"][i] = True - - if all(model.gen_state["completed"]): - utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt - del model.gen_state["completed"] - return True - return False @staticmethod def stop_sequence_stopper( @@ -149,7 +127,12 @@ class Stoppers: # null_character = model.tokenizer.encode(chr(0))[0] if "completed" not in model.gen_state: model.gen_state["completed"] = [False] * len(input_ids) - + if utils.koboldai_vars.adventure: + extra_options = ["> You", "You:", "\n\n You", "\n\nYou", ". You"] + for option in extra_options: + if option not in utils.koboldai_vars.stop_sequence: + utils.koboldai_vars.stop_sequence.append(option) + #one issue is that the stop sequence may not actual align with the end of token #if its a subsection of a longer token for stopper in utils.koboldai_vars.stop_sequence: @@ -163,6 +146,10 @@ class Stoppers: if all(model.gen_state["completed"]): utils.koboldai_vars.generated_tkns = utils.koboldai_vars.genamt del model.gen_state["completed"] + if utils.koboldai_vars.adventure: # Remove added adventure mode stop sequences + for option in extra_options: + if option in utils.koboldai_vars.stop_sequence: + utils.koboldai_vars.stop_sequence.remove(option) return True return False From 6e6c4ee5d4346e874265d63db9d154ddda9843bb Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:34:57 -0500 Subject: [PATCH 006/107] Update hf_torch.py --- modeling/inference_models/hf_torch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 10fd3cb6..2f575e73 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -69,7 +69,6 @@ class HFTorchInferenceModel(HFInferenceModel): Stoppers.dynamic_wi_scanner, Stoppers.singleline_stopper, Stoppers.chat_mode_stopper, - Stoppers.adventure_mode_stopper, Stoppers.stop_sequence_stopper, ] From 13405d836e5cd092cdb2db8aec03e3b577d3d969 Mon Sep 17 00:00:00 2001 From: YellowRoseCx <80486540+YellowRoseCx@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:39:50 -0500 Subject: [PATCH 007/107] Update stoppers.py --- modeling/stoppers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index e36eb522..3f277f48 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -116,7 +116,6 @@ class Stoppers: return True return False - @staticmethod def stop_sequence_stopper( model: InferenceModel, From fef42a6273064a1103715458505fe41f5e747b32 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 11:52:39 -0500 Subject: [PATCH 008/107] API: Fix loading --- aiserver.py | 18 ++++++++++++++---- .../inference_models/generic_hf_torch/class.py | 5 ++++- modeling/inference_models/hf.py | 11 ++++++++++- modeling/inference_models/hf_torch.py | 3 +++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0aa9bd4c..e76bf2c7 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1747,9 +1747,6 @@ def load_model(model_backend, initial_load=False): koboldai_vars.aibusy = True koboldai_vars.horde_share = False - if initial_load: - use_breakmodel_args = True - koboldai_vars.reset_model() koboldai_vars.noai = False @@ -8235,6 +8232,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema): class ModelSelectionSchema(KoboldSchema): model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'}) + backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys())) def _generate_text(body: GenerationInputSchema): if koboldai_vars.aibusy or koboldai_vars.genseqs: @@ -8492,6 +8490,7 @@ def put_model(body: ModelSelectionSchema): summary: Load a model description: |-2 Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model. + Optionally, a backend parameter can be passed in to dictate which backend loads the model. tags: - model requestBody: @@ -8501,6 +8500,7 @@ def put_model(body: ModelSelectionSchema): schema: ModelSelectionSchema example: model: ReadOnly + backend: Read Only responses: 200: description: Successful request @@ -8518,8 +8518,18 @@ def put_model(body: ModelSelectionSchema): set_aibusy(1) old_model = koboldai_vars.model koboldai_vars.model = body.model.strip() + + backend = getattr(body, "backend", None) + if not backend: + # Backend is optional for backwards compatibility; it should probably be + # required on the next major API version. + if body.model == "ReadOnly": + backend = "Read Only" + else: + backend = "Huggingface" + try: - load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True) + load_model(backend) except Exception as e: koboldai_vars.model = old_model raise e diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 93def5a6..541c3891 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -21,7 +21,10 @@ model_backend_name = "Huggingface" model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) class model_backend(HFTorchInferenceModel): - + def __init__(self) -> None: + super().__init__() + self.use_4_bit = False + def _initialize_model(self): return diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e407f5b4..27425a46 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -15,8 +15,12 @@ class HFInferenceModel(InferenceModel): def __init__(self) -> None: super().__init__() self.model_config = None - #self.model_name = model_name + # TODO: model_name should probably be an instantiation parameter all the + # way down the inheritance chain. + self.model_name = None + + self.path = None self.hf_torch = False self.model = None self.tokenizer = None @@ -213,6 +217,11 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass + + def _pre_load(self) -> None: + # HACK: Make model instantiation work without UI parameters + self.model_name = self.model_name or utils.koboldai_vars.model + return super()._pre_load() def _post_load(self) -> None: self.badwordsids = koboldai_settings.badwordsids_default diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 1b411c95..140acedc 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -89,7 +89,10 @@ class HFTorchInferenceModel(HFInferenceModel): self.hf_torch = True self.lazy_load = True self.low_mem = False + + # TODO: Mayyyybe only keep one of these variables self.nobreakmodel = False + self.breakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens, From 813e210127c2d990d43a8ae7cd90f7124ea64d61 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 11:52:49 -0500 Subject: [PATCH 009/107] Bump tiny API version As we're adding a new (though optional) parameter to load endpoint --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index e76bf2c7..95234644 100644 --- a/aiserver.py +++ b/aiserver.py @@ -892,7 +892,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.2", + version="1.2.3", prefixes=["/api/v1", "/api/latest"], tags=tags, ) From b9b3cd3aba4d076cf9c703b5cb29a2ecc4b6e431 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 12:02:53 -0500 Subject: [PATCH 010/107] API: Fix /story --- aiserver.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 95234644..2278015c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8817,8 +8817,14 @@ def get_story(): chunks = [] if koboldai_vars.gamestarted: chunks.append({"num": 0, "text": koboldai_vars.prompt}) - for num, action in koboldai_vars.actions.items(): - chunks.append({"num": num + 1, "text": action}) + + last_action_num = list(koboldai_vars.actions.actions.keys())[-1] + for num, action in koboldai_vars.actions.actions.items(): + text = action["Selected Text"] + # The last action seems to always be empty + if not text and num == last_action_num: + continue + chunks.append({"num": num + 1, "text": text}) return {"results": chunks} From 6da7a9629ad9c5ae2b25415e12174addc6b3b545 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:01:07 -0500 Subject: [PATCH 011/107] API: Fix /story/load --- aiserver.py | 10 +++++++--- koboldai_settings.py | 15 +++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2278015c..153e6d07 100644 --- a/aiserver.py +++ b/aiserver.py @@ -5130,9 +5130,13 @@ def load_story_v1(js, from_file=None): def load_story_v2(js, from_file=None): logger.debug("Loading V2 Story") logger.debug("Called from {}".format(inspect.stack()[1].function)) - leave_room(session['story']) - session['story'] = js['story_name'] - join_room(session['story']) + + new_story = js["story_name"] + # In socket context + if hasattr(request, "sid"): + leave_room(session['story']) + join_room(new_story) + session['story'] = new_story koboldai_vars.load_story(session['story'], js) diff --git a/koboldai_settings.py b/koboldai_settings.py index ebd8c019..3bc0eb86 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -6,7 +6,7 @@ import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys import shutil from typing import List, Union from io import BytesIO -from flask import has_request_context, session +from flask import has_request_context, session, request from flask_socketio import join_room, leave_room from collections import OrderedDict import multiprocessing @@ -130,11 +130,14 @@ class koboldai_vars(object): original_story_name = story_name if not multi_story: story_name = 'default' - #Leave the old room and join the new one - logger.debug("Leaving room {}".format(session['story'])) - leave_room(session['story']) - logger.debug("Joining room {}".format(story_name)) - join_room(story_name) + + # Leave the old room and join the new one if in socket context + if hasattr(request, "sid"): + logger.debug("Leaving room {}".format(session['story'])) + leave_room(session['story']) + logger.debug("Joining room {}".format(story_name)) + join_room(story_name) + session['story'] = story_name logger.debug("Sending story reset") self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name) From 9726d12ede27bd3b214c412f48cc8dc9c4f66f1c Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:05:35 -0500 Subject: [PATCH 012/107] API: Fix /story/end (POST) --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 153e6d07..6e55b943 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3265,9 +3265,9 @@ def check_for_backend_compilation(): def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False): # Ignore new submissions if the AI is currently busy - if(koboldai_vars.aibusy): + if koboldai_vars.aibusy and not ignore_aibusy: return - + while(True): set_aibusy(1) koboldai_vars.actions.clear_unused_options() From 2d80f2ebb5902f4a2400227ab390f68f5b837e94 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:08:57 -0500 Subject: [PATCH 013/107] API: Fix getstorynums --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 6e55b943..c90d862c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8852,7 +8852,7 @@ def get_story_nums(): chunks = [] if koboldai_vars.gamestarted: chunks.append(0) - for num in koboldai_vars.actions.keys(): + for num in koboldai_vars.actions.actions.keys(): chunks.append(num + 1) return {"results": chunks} From 4335d1f46a9968dc6589daf4c147408835a6c5e9 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 19 Jul 2023 13:18:45 -0500 Subject: [PATCH 014/107] API: Fix /world_info --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index c90d862c..604dba5f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -4424,7 +4424,7 @@ def requestwi(): # and items in different folders are sorted based on the order of the folders #==================================================================# def stablesortwi(): - mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)} + mapping = {str(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)} koboldai_vars.worldinfo.sort(key=lambda x: mapping[str(x["folder"])] if x["folder"] is not None else float("inf")) last_folder = ... last_wi = None @@ -9213,7 +9213,7 @@ def get_world_info(): if wi["folder"] != last_folder: folder = [] if wi["folder"] is not None: - folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder}) + folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder}) last_folder = wi["folder"] (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")}) return {"folders": folders, "entries": entries} From 34a98d2962678b468b833ec3c506c690e4c50e8e Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:48:02 -0500 Subject: [PATCH 015/107] Context Menu: Small visual fixes woohooooo back to css - fixes margins to look better - moves contents of context menu items 1px down - fixes context menus near edge wrapping their inner text (ew) --- static/koboldai.css | 10 ++++++++-- static/koboldai.js | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/static/koboldai.css b/static/koboldai.css index 3252c21a..3ad643d2 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -2705,13 +2705,14 @@ body { #context-menu > hr { /* Division Color*/ border-top: 2px solid var(--context_menu_division); - margin: 5px 5px; + margin: 3px 5px; } .context-menu-item { padding: 5px; padding-right: 25px; min-width: 100px; + white-space: nowrap; } .context-menu-item:hover { @@ -2722,11 +2723,16 @@ body { .context-menu-item > .material-icons-outlined { position: relative; - top: 2px; + top: 3px; font-size: 15px; margin-right: 5px; } +.context-menu-item > .context-menu-label { + position: relative; + top: 1px; +} + /* Substitutions */ #Substitutions { margin-left: 10px; diff --git a/static/koboldai.js b/static/koboldai.js index 8b70dd6a..e8053f23 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -6071,7 +6071,7 @@ process_cookies(); context_menu_cache.push({shouldShow: action.shouldShow}); let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon}); - item.append(action.label); + $e("span", item, {classes: ["context-menu-label"], innerText: action.label}); item.addEventListener("mousedown", e => e.preventDefault()); // Expose the "summonEvent" to enable access to original context menu target. From 4921040fb462dac00ba8a028a8b22e9524f9f740 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:52:12 -0500 Subject: [PATCH 016/107] Context Menu: Make things a little less bloaty 5px was a bit excessive TODO: studied the context menu in my browser for a bit and noticed that if it was going to be too close to the bottom, the browser changes the vertical direction the context menu goes. sounds neat! --- static/koboldai.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/koboldai.css b/static/koboldai.css index 3ad643d2..b83384c4 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -2709,7 +2709,7 @@ body { } .context-menu-item { - padding: 5px; + padding: 4px; padding-right: 25px; min-width: 100px; white-space: nowrap; From 46c377b0c362d715e1c37f423ab763f367c32299 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 00:53:48 -0500 Subject: [PATCH 017/107] Context Menu: Add stubs for new temporary stoppingcriteria idea I think this would be cool! Ideas: - disable/grey when model doesnt support stopping criteria - shortcuts (maybe, this would def be a power user thing) - option to generate until EOS token - option to generate forever until user manually stops - (not super related but pixels away) make retry while generation is ongoing cancel generation and retry. same with undo. --- static/koboldai.js | 6 ++++++ templates/index_new.html | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index e8053f23..64da7146 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -147,6 +147,12 @@ const context_menu_actions = { "wi-img-upload-button": [ {label: "Upload Image", icon: "file_upload", enabledOn: "ALWAYS", click: wiImageReplace}, {label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage}, + ], + "submit-button": [ + {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, ] }; diff --git a/templates/index_new.html b/templates/index_new.html index 99b8c941..53bcffd5 100644 --- a/templates/index_new.html +++ b/templates/index_new.html @@ -110,7 +110,7 @@ - + From 6cf63f781a3c17ab6b41c5f12cd05f824be9ba04 Mon Sep 17 00:00:00 2001 From: onesome Date: Fri, 21 Jul 2023 01:58:57 -0500 Subject: [PATCH 018/107] YEAAAAAAAAAA --- static/koboldai.js | 11 +++++++++++ templates/index_new.html | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index 64da7146..75563df2 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -151,8 +151,19 @@ const context_menu_actions = { "submit-button": [ {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, null, + {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + null, {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + ], + "undo-button": [ + {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}}, + {label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}}, + null, + {label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}}, ] }; diff --git a/templates/index_new.html b/templates/index_new.html index 53bcffd5..2b1c0ddf 100644 --- a/templates/index_new.html +++ b/templates/index_new.html @@ -112,7 +112,7 @@ - + From fa0a09994386f704e99ef7bebeb6903469aea044 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 10:38:17 -0500 Subject: [PATCH 019/107] Update comment --- modeling/inference_models/hf_torch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 140acedc..c4e82e6f 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -90,7 +90,8 @@ class HFTorchInferenceModel(HFInferenceModel): self.lazy_load = True self.low_mem = False - # TODO: Mayyyybe only keep one of these variables + # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel` + # indicates whether breakmodel is currently being used self.nobreakmodel = False self.breakmodel = False From 3a43b254b86733a637a2286bf8a3c9421674771a Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:27:30 -0500 Subject: [PATCH 020/107] Add basic support for some of the quick stoppers --- aiserver.py | 64 +++++++++++++++++++++++++++---------- modeling/inference_model.py | 32 +++++++++++++++++++ modeling/stoppers.py | 52 ++++++++++++++++++++++++------ static/koboldai.js | 23 ++++++++----- 4 files changed, 137 insertions(+), 34 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0aa9bd4c..1cb9146e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -12,6 +12,8 @@ import random import shutil import eventlet +from modeling.inference_model import GenerationMode + eventlet.monkey_patch(all=True, thread=False, os=False) import os, inspect, contextlib, pickle os.system("") @@ -3266,7 +3268,16 @@ def check_for_backend_compilation(): break koboldai_vars.checking = False -def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False, no_generate=False, ignore_aibusy=False): +def actionsubmit( + data, + actionmode=0, + force_submit=False, + force_prompt_gen=False, + disable_recentrng=False, + no_generate=False, + ignore_aibusy=False, + gen_mode=GenerationMode.STANDARD +): # Ignore new submissions if the AI is currently busy if(koboldai_vars.aibusy): return @@ -3424,7 +3435,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, if(not no_generate and not koboldai_vars.noai and koboldai_vars.lua_koboldbridge.generating): # Off to the tokenizer! - calcsubmit("") + calcsubmit("", gen_mode=gen_mode) if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0): data = "" force_submit = True @@ -3779,7 +3790,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, #==================================================================# # Take submitted text and build the text to be given to generator #==================================================================# -def calcsubmit(txt): +def calcsubmit(txt, gen_mode=GenerationMode.STANDARD): anotetxt = "" # Placeholder for Author's Note text forceanote = False # In case we don't have enough actions to hit A.N. depth anoteadded = False # In case our budget runs out before we hit A.N. depth @@ -3821,7 +3832,7 @@ def calcsubmit(txt): logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time)) start_time = time.time() - generate(subtxt, min, max, found_entries) + generate(subtxt, min, max, found_entries, gen_mode=gen_mode) logger.debug("Submit: generate time {}s".format(time.time()-start_time)) attention_bias.attention_bias = None @@ -3889,7 +3900,7 @@ class HordeException(Exception): # Send text to generator and deal with output #==================================================================# -def generate(txt, minimum, maximum, found_entries=None): +def generate(txt, minimum, maximum, found_entries=None, gen_mode=GenerationMode.STANDARD): koboldai_vars.generated_tkns = 0 if(found_entries is None): @@ -3911,7 +3922,7 @@ def generate(txt, minimum, maximum, found_entries=None): # Submit input text to generator try: start_time = time.time() - genout, already_generated = tpool.execute(model.core_generate, txt, found_entries) + genout, already_generated = tpool.execute(model.core_generate, txt, found_entries, gen_mode=gen_mode) logger.debug("Generate: core_generate time {}s".format(time.time()-start_time)) except Exception as e: if(issubclass(type(e), lupa.LuaError)): @@ -6168,22 +6179,43 @@ def UI_2_delete_option(data): @socketio.on('submit') @logger.catch def UI_2_submit(data): - if not koboldai_vars.noai and data['theme'] != "": + if not koboldai_vars.noai and data['theme']: + # Random prompt generation logger.debug("doing random prompt") memory = koboldai_vars.memory koboldai_vars.memory = "{}\n\nYou generate the following {} story concept :".format(koboldai_vars.memory, data['theme']) koboldai_vars.lua_koboldbridge.feedback = None actionsubmit("", force_submit=True, force_prompt_gen=True) koboldai_vars.memory = memory - else: - logger.debug("doing normal input") - koboldai_vars.actions.clear_unused_options() - koboldai_vars.lua_koboldbridge.feedback = None - koboldai_vars.recentrng = koboldai_vars.recentrngm = None - if koboldai_vars.actions.action_count == -1: - actionsubmit(data['data'], actionmode=koboldai_vars.actionmode) - else: - actionsubmit(data['data'], actionmode=koboldai_vars.actionmode) + return + + logger.debug("doing normal input") + koboldai_vars.actions.clear_unused_options() + koboldai_vars.lua_koboldbridge.feedback = None + koboldai_vars.recentrng = koboldai_vars.recentrngm = None + + gen_mode_name = data.get("gen_mode", None) + gen_mode = { + # If we don't have a gen mode, or it's None (the default), just do a + # normal submission. + None: GenerationMode.STANDARD, + + # NOTE: forever should be a no-op on models that don't support + # interrupting generation. This should be conveyed to the user by + # graying out the option in the context menu. + "forever": GenerationMode.FOREVER, + + # The following gen modes require stopping criteria to be respected by + # the backend: + "until_eos": GenerationMode.UNTIL_EOS, + "until_newline": GenerationMode.UNTIL_NEWLINE, + "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END, + }.get(gen_mode_name, None) + + if not gen_mode: + raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'") + + actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) #==================================================================# # Event triggered when user clicks the submit button diff --git a/modeling/inference_model.py b/modeling/inference_model.py index a2d4fa63..1d285576 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -3,6 +3,8 @@ from __future__ import annotations from dataclasses import dataclass import time from typing import List, Optional, Union + +from enum import Enum from logger import logger import torch @@ -12,6 +14,7 @@ from transformers import ( GPT2Tokenizer, AutoTokenizer, ) +from modeling.stoppers import Stoppers from modeling.tokenizer import GenericTokenizer from modeling import logits_processors @@ -154,6 +157,12 @@ class ModelCapabilities: # Some models need to warm up the TPU before use uses_tpu: bool = False +class GenerationMode(Enum): + STANDARD = 0 + FOREVER = 1 + UNTIL_EOS = 2 + UNTIL_NEWLINE = 3 + UNTIL_SENTENCE_END = 4 class InferenceModel: """Root class for all models.""" @@ -256,6 +265,7 @@ class InferenceModel: self, text: list, found_entries: set, + gen_mode: GenerationMode = GenerationMode.STANDARD, ): """Generate story text. Heavily tied to story-specific parameters; if you are making a new generation-based feature, consider `generate_raw()`. @@ -263,6 +273,7 @@ class InferenceModel: Args: text (list): Encoded input tokens found_entries (set): Entries found for Dynamic WI + gen_mode (GenerationMode): The GenerationMode to pass to raw_generate. Defaults to GenerationMode.STANDARD Raises: RuntimeError: if inconsistancies are detected with the internal state and Lua state -- sanity check @@ -358,6 +369,7 @@ class InferenceModel: seed=utils.koboldai_vars.seed if utils.koboldai_vars.full_determinism else None, + gen_mode=gen_mode ) logger.debug( "core_generate: run raw_generate pass {} {}s".format( @@ -532,6 +544,7 @@ class InferenceModel: found_entries: set = (), tpu_dynamic_inference: bool = False, seed: Optional[int] = None, + gen_mode: GenerationMode = GenerationMode.STANDARD, **kwargs, ) -> GenerationResult: """A wrapper around `_raw_generate()` that handles gen_state and other stuff. Use this to generate text outside of the story. @@ -547,6 +560,7 @@ class InferenceModel: is_core (bool, optional): Whether this generation is a core story generation. Defaults to False. single_line (bool, optional): Generate one line only.. Defaults to False. found_entries (set, optional): Entries found for Dynamic WI. Defaults to (). + gen_mode (GenerationMode): Special generation mode. Defaults to GenerationMode.STANDARD. Raises: ValueError: If prompt type is weird @@ -568,6 +582,21 @@ class InferenceModel: "wi_scanner_excluded_keys", set() ) + temp_stoppers = [] + + if gen_mode == GenerationMode.FOREVER: + raise NotImplementedError() + elif gen_mode == GenerationMode.UNTIL_EOS: + # Still need to unban + raise NotImplementedError() + elif gen_mode == GenerationMode.UNTIL_NEWLINE: + # TODO: Look into replacing `single_line` with `generation_mode` + temp_stoppers.append(Stoppers.newline_stopper) + elif gen_mode == GenerationMode.UNTIL_SENTENCE_END: + temp_stoppers.append(Stoppers.sentence_end_stopper) + + self.stopper_hooks += temp_stoppers + utils.koboldai_vars.inference_config.do_core = is_core gen_settings = GenerationSettings(*(generation_settings or {})) @@ -604,6 +633,9 @@ class InferenceModel: f"Generated {len(result.encoded[0])} tokens in {time_end} seconds, for an average rate of {tokens_per_second} tokens per second." ) + for stopper in temp_stoppers: + self.stopper_hooks.remove(stopper) + return result def generate( diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 94c09e85..02c1ce48 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -3,15 +3,12 @@ from __future__ import annotations import torch import utils -from modeling.inference_model import ( - InferenceModel, -) - +from modeling import inference_model class Stoppers: @staticmethod def core_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.inference_config.do_core: @@ -62,7 +59,7 @@ class Stoppers: @staticmethod def dynamic_wi_scanner( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.inference_config.do_dynamic_wi: @@ -93,7 +90,7 @@ class Stoppers: @staticmethod def chat_mode_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: if not utils.koboldai_vars.chatmode: @@ -118,7 +115,7 @@ class Stoppers: @staticmethod def stop_sequence_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: @@ -145,14 +142,22 @@ class Stoppers: @staticmethod def singleline_stopper( - model: InferenceModel, + model: inference_model.InferenceModel, input_ids: torch.LongTensor, ) -> bool: - """If singleline mode is enabled, it's pointless to generate output beyond the first newline.""" + """Stop on occurances of newlines **if singleline is enabled**.""" + # It might be better just to do this further up the line if not utils.koboldai_vars.singleline: return False + return Stoppers.newline_stopper(model, input_ids) + @staticmethod + def newline_stopper( + model: inference_model.InferenceModel, + input_ids: torch.LongTensor, + ) -> bool: + """Stop on occurances of newlines.""" # Keep track of presence of newlines in each sequence; we cannot stop a # batch member individually, so we must wait for all of them to contain # a newline. @@ -167,3 +172,30 @@ class Stoppers: del model.gen_state["newline_in_sequence"] return True return False + + @staticmethod + def sentence_end_stopper( + model: inference_model.InferenceModel, + input_ids: torch.LongTensor, + ) -> bool: + """Stops at the end of sentences.""" + + # TODO: Make this more robust + SENTENCE_ENDS = [".", "?", "!"] + + # We need to keep track of stopping for each batch, since we can't stop + # one individually. + if "sentence_end_in_sequence" not in model.gen_state: + model.gen_state["sentence_end_sequence"] = [False] * len(input_ids) + + for sequence_idx, batch_sequence in enumerate(input_ids): + decoded = model.tokenizer.decode(batch_sequence[-1]) + for end in SENTENCE_ENDS: + if end in decoded: + model.gen_state["sentence_end_sequence"][sequence_idx] = True + break + + if all(model.gen_state["sentence_end_sequence"]): + del model.gen_state["sentence_end_sequence"] + return True + return False \ No newline at end of file diff --git a/static/koboldai.js b/static/koboldai.js index 75563df2..320ec927 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -149,13 +149,13 @@ const context_menu_actions = { {label: "Use Generated Image", icon: "image", enabledOn: "GENERATED-IMAGE", click: wiImageUseGeneratedImage}, ], "submit-button": [ - {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()}, null, - {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, - {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")}, + {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")}, null, - {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, - {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: function(){}}, + {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, + {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, ], "undo-button": [ {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, @@ -256,10 +256,17 @@ function disconnect() { document.getElementById("disconnect_message").classList.remove("hidden"); } -function storySubmit() { +function storySubmit(genMode=null) { + const textInput = document.getElementById("input_text"); + const themeInput = document.getElementById("themetext"); disruptStoryState(); - socket.emit('submit', {'data': document.getElementById('input_text').value, 'theme': document.getElementById('themetext').value}); - document.getElementById('input_text').value = ''; + socket.emit('submit', { + data: textInput.value, + theme: themeInput.value, + gen_mode: genMode, + }); + + textInput.value = ''; document.getElementById('themetext').value = ''; } From 1c4157a41b753b8e3dd4246770c3cfa889485306 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:33:38 -0500 Subject: [PATCH 021/107] Maybe another time too many ideas at once --- static/koboldai.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/static/koboldai.js b/static/koboldai.js index 320ec927..8ccac9dc 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -157,14 +157,6 @@ const context_menu_actions = { {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, ], - "undo-button": [ - {label: "Undo", icon: "undo", enabledOn: "ALWAYS", click: function(){}}, - null, - {label: "Prune Actions", icon: "cut", enabledOn: "ALWAYS", click: function(){}}, - {label: "Shred", icon: "local_fire_department", enabledOn: "ALWAYS", click: function(){}}, - null, - {label: "Trim Last Sentence", icon: "carpenter", enabledOn: "ALWAYS", click: function(){}}, - ] }; let context_menu_cache = []; From b8671cce09e83c4c64239951edf0150ef8b8d190 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 13:48:23 -0500 Subject: [PATCH 022/107] Context Menu: Change positioning algorithm for y-axis --- static/koboldai.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index 8ccac9dc..cdb7bc79 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -5818,8 +5818,21 @@ function position_context_menu(contextMenu, x, y) { right: x + width, }; + // Slide over if running against the window bounds. if (farMenuBounds.right > bounds.right) x -= farMenuBounds.right - bounds.right; - if (farMenuBounds.bottom > bounds.bottom) y -= farMenuBounds.bottom - bounds.bottom; + + if (farMenuBounds.bottom > bounds.bottom) { + // We've hit the bottom. + + // The old algorithm pushed the menu against the wall, similar to what's + // done on the x-axis: + // y -= farMenuBounds.bottom - bounds.bottom; + // But now, we make the box change its emission direction from the cursor: + y -= (height + 5); + // The main advantage of this approach is that the cursor is never directly + // placed above a context menu item immediately after activating the context + // menu. (Thus the 5px offset also added) + } contextMenu.style.left = `${x}px`; contextMenu.style.top = `${y}px`; From 8d5ae38b4568e1dbc893c8adfb7d3ac4a8bd57c2 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 14:29:41 -0500 Subject: [PATCH 023/107] Context Menu: Show if gen mode is supported - adds callback support to `enabledOn` in context menu items - adds `supported_gen_modes` variable for frontend to check if a gen mode is supported - adds `get_supported_gen_modes` to `InferenceModel` to get supported gen modes - takes advantage of cool enum features for less enum-handling code --- aiserver.py | 29 ++++++------------- koboldai_settings.py | 1 + modeling/inference_model.py | 37 +++++++++++++++++++++---- static/koboldai.js | 55 +++++++++++++++++++++++++++++-------- 4 files changed, 84 insertions(+), 38 deletions(-) diff --git a/aiserver.py b/aiserver.py index 1cb9146e..ba224b3c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1790,7 +1790,9 @@ def load_model(model_backend, initial_load=False): with use_custom_unpickler(RestrictedUnpickler): model = model_backends[model_backend] + koboldai_vars.supported_gen_modes = [x.value for x in model.get_supported_gen_modes()] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) + koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup if koboldai_vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"): koboldai_vars.model = os.path.basename(os.path.normpath(model.path)) @@ -6194,26 +6196,13 @@ def UI_2_submit(data): koboldai_vars.lua_koboldbridge.feedback = None koboldai_vars.recentrng = koboldai_vars.recentrngm = None - gen_mode_name = data.get("gen_mode", None) - gen_mode = { - # If we don't have a gen mode, or it's None (the default), just do a - # normal submission. - None: GenerationMode.STANDARD, - - # NOTE: forever should be a no-op on models that don't support - # interrupting generation. This should be conveyed to the user by - # graying out the option in the context menu. - "forever": GenerationMode.FOREVER, - - # The following gen modes require stopping criteria to be respected by - # the backend: - "until_eos": GenerationMode.UNTIL_EOS, - "until_newline": GenerationMode.UNTIL_NEWLINE, - "until_sentence_end": GenerationMode.UNTIL_SENTENCE_END, - }.get(gen_mode_name, None) - - if not gen_mode: - raise RuntimeError(f"Unknown gen_mode '{gen_mode_name}'") + gen_mode_name = data.get("gen_mode", None) or "standard" + try: + gen_mode = GenerationMode(gen_mode_name) + except ValueError: + # Invalid enum lookup! + gen_mode = GenerationMode.STANDARD + logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!") actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) diff --git a/koboldai_settings.py b/koboldai_settings.py index ebd8c019..f061beb1 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -685,6 +685,7 @@ class model_settings(settings): self._koboldai_vars = koboldai_vars self.alt_multi_gen = False self.bit_8_available = None + self.supported_gen_modes = [] def reset_for_model_load(self): self.simple_randomness = 0 #Set first as this affects other outputs diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 1d285576..e09249c3 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -147,7 +147,10 @@ class GenerationSettings: class ModelCapabilities: embedding_manipulation: bool = False post_token_hooks: bool = False + + # Used to gauge if manual stopping is possible stopper_hooks: bool = False + # TODO: Support non-live probabilities from APIs post_token_probs: bool = False @@ -158,11 +161,11 @@ class ModelCapabilities: uses_tpu: bool = False class GenerationMode(Enum): - STANDARD = 0 - FOREVER = 1 - UNTIL_EOS = 2 - UNTIL_NEWLINE = 3 - UNTIL_SENTENCE_END = 4 + STANDARD = "standard" + FOREVER = "forever" + UNTIL_EOS = "until_eos" + UNTIL_NEWLINE = "until_newline" + UNTIL_SENTENCE_END = "until_sentence_end" class InferenceModel: """Root class for all models.""" @@ -585,7 +588,13 @@ class InferenceModel: temp_stoppers = [] if gen_mode == GenerationMode.FOREVER: - raise NotImplementedError() + if self.capabilties.stopper_hooks: + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 + else: + logger.warning( + "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!" + ) elif gen_mode == GenerationMode.UNTIL_EOS: # Still need to unban raise NotImplementedError() @@ -652,3 +661,19 @@ class InferenceModel: def _post_token_gen(self, input_ids: torch.LongTensor) -> None: for hook in self.post_token_hooks: hook(self, input_ids) + + def get_supported_gen_modes(self) -> List[GenerationMode]: + """Returns a list of compatible `GenerationMode`s for the current model. + + Returns: + List[GenerationMode]: A list of compatible `GenerationMode`s. + """ + ret = [] + if self.capabilties.stopper_hooks: + ret += [ + GenerationMode.FOREVER, + GenerationMode.UNTIL_EOS, + GenerationMode.UNTIL_NEWLINE, + GenerationMode.UNTIL_SENTENCE_END, + ] + return ret \ No newline at end of file diff --git a/static/koboldai.js b/static/koboldai.js index cdb7bc79..d7560f54 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -83,6 +83,7 @@ let story_id = -1; var dirty_chunks = []; var initial_socketio_connection_occured = false; var selected_model_data; +var supported_gen_modes = []; // Each entry into this array should be an object that looks like: // {class: "class", key: "key", func: callback} @@ -151,11 +152,31 @@ const context_menu_actions = { "submit-button": [ {label: "Generate", icon: "edit", enabledOn: "ALWAYS", click: () => storySubmit()}, null, - {label: "Generate Forever", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("forever")}, - {label: "Generate Until EOS", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_eos")}, + { + label: "Generate Forever", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("forever"), + click: () => storySubmit("forever") + }, + { + label: "Generate Until EOS", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_eos"), + click: () => storySubmit("until_eos") + }, null, - {label: "Finish Line", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_newline")}, - {label: "Finish Sentence", icon: "edit_off", enabledOn: "ALWAYS", click: () => storySubmit("until_sentence_end")}, + { + label: "Finish Line", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_newline"), + click: () => storySubmit("until_newline") + }, + { + label: "Finish Sentence", + icon: "edit_off", + enabledOn: () => supported_gen_modes.includes("until_sentence_end"), + click: () => storySubmit("until_sentence_end") + }, ], }; @@ -941,6 +962,9 @@ function var_changed(data) { //special case for welcome text since we want to allow HTML } else if (data.classname == 'model' && data.name == 'welcome') { document.getElementById('welcome_text').innerHTML = data.value; + //Special case for permitted generation modes + } else if (data.classname == 'model' && data.name == 'supported_gen_modes') { + supported_gen_modes = data.value; //Basic Data Syncing } else { var elements_to_change = document.getElementsByClassName("var_sync_"+data.classname.replace(" ", "_")+"_"+data.name.replace(" ", "_")); @@ -6090,21 +6114,23 @@ process_cookies(); continue; } + const enableCriteriaIsFunction = typeof action.enabledOn === "function" - let item = $e("div", contextMenu, { + const itemEl = $e("div", contextMenu, { classes: ["context-menu-item", "noselect", `context-menu-${key}`], - "enabled-on": action.enabledOn, + "enabled-on": enableCriteriaIsFunction ? "CALLBACK" : action.enabledOn, "cache-index": context_menu_cache.length }); + itemEl.enabledOnCallback = action.enabledOn; context_menu_cache.push({shouldShow: action.shouldShow}); - let icon = $e("span", item, {classes: ["material-icons-outlined"], innerText: action.icon}); - $e("span", item, {classes: ["context-menu-label"], innerText: action.label}); + const icon = $e("span", itemEl, {classes: ["material-icons-outlined"], innerText: action.icon}); + $e("span", itemEl, {classes: ["context-menu-label"], innerText: action.label}); - item.addEventListener("mousedown", e => e.preventDefault()); + itemEl.addEventListener("mousedown", e => e.preventDefault()); // Expose the "summonEvent" to enable access to original context menu target. - item.addEventListener("click", () => action.click(summonEvent)); + itemEl.addEventListener("click", () => action.click(summonEvent)); } } @@ -6154,10 +6180,10 @@ process_cookies(); // Disable non-applicable items $(".context-menu-item").addClass("disabled"); - + // A selection is made if (getSelectionText()) $(".context-menu-item[enabled-on=SELECTION]").removeClass("disabled"); - + // The caret is placed if (get_caret_position(target) !== null) $(".context-menu-item[enabled-on=CARET]").removeClass("disabled"); @@ -6166,6 +6192,11 @@ process_cookies(); $(".context-menu-item[enabled-on=ALWAYS]").removeClass("disabled"); + for (const contextMenuItem of document.querySelectorAll(".context-menu-item[enabled-on=CALLBACK]")) { + if (!contextMenuItem.enabledOnCallback()) continue; + contextMenuItem.classList.remove("disabled"); + } + // Make sure hr isn't first or last visible element let visibles = []; for (const item of contextMenu.children) { From c78401bd124be939134c1f50ac831c434697b681 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:22:14 -0500 Subject: [PATCH 024/107] Fix gen mode on first generation --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index ba224b3c..0bfaca22 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3371,7 +3371,7 @@ def actionsubmit( koboldai_vars.prompt = data # Clear the startup text from game screen emit('from_server', {'cmd': 'updatescreen', 'gamestarted': False, 'data': 'Please wait, generating story...'}, broadcast=True, room="UI_1") - calcsubmit("") # Run the first action through the generator + calcsubmit("", gen_mode=gen_mode) # Run the first action through the generator if(not koboldai_vars.abort and koboldai_vars.lua_koboldbridge.restart_sequence is not None and len(koboldai_vars.genseqs) == 0): data = "" force_submit = True @@ -6205,7 +6205,7 @@ def UI_2_submit(data): logger.warning(f"Unknown gen_mode '{gen_mode_name}', using STANDARD! Report this!") actionsubmit(data['data'], actionmode=koboldai_vars.actionmode, gen_mode=gen_mode) - + #==================================================================# # Event triggered when user clicks the submit button #==================================================================# From e5d0a597a1806815ca7463a6536d6719ceb8d165 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:36:32 -0500 Subject: [PATCH 025/107] Generation Mode: UNTIL_EOS This mode enables the EOS token and will generate infinitely until hitting it. --- modeling/inference_model.py | 24 +++++++++++++----------- modeling/inference_models/hf_torch.py | 13 ++++++++++++- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index e09249c3..8b7f0e3e 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -585,19 +585,21 @@ class InferenceModel: "wi_scanner_excluded_keys", set() ) + self.gen_state["allow_eos"] = False + temp_stoppers = [] + if gen_mode not in self.get_supported_gen_modes(): + gen_mode = GenerationMode.STANDARD + logger.warning(f"User requested unsupported GenerationMode '{gen_mode}'!") + if gen_mode == GenerationMode.FOREVER: - if self.capabilties.stopper_hooks: - self.gen_state["stop_at_genamt"] = False - max_new = 1e7 - else: - logger.warning( - "User requested infinite generation on model that doesn't support stop hooks. Recipe for disaster!" - ) + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 elif gen_mode == GenerationMode.UNTIL_EOS: - # Still need to unban - raise NotImplementedError() + self.gen_state["allow_eos"] = True + self.gen_state["stop_at_genamt"] = False + max_new = 1e7 elif gen_mode == GenerationMode.UNTIL_NEWLINE: # TODO: Look into replacing `single_line` with `generation_mode` temp_stoppers.append(Stoppers.newline_stopper) @@ -668,11 +670,11 @@ class InferenceModel: Returns: List[GenerationMode]: A list of compatible `GenerationMode`s. """ - ret = [] + ret = [GenerationMode.STANDARD] + if self.capabilties.stopper_hooks: ret += [ GenerationMode.FOREVER, - GenerationMode.UNTIL_EOS, GenerationMode.UNTIL_NEWLINE, GenerationMode.UNTIL_SENTENCE_END, ] diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 1b411c95..b4909f60 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -31,6 +31,7 @@ from modeling.stoppers import Stoppers from modeling.post_token_hooks import PostTokenHooks from modeling.inference_models.hf import HFInferenceModel from modeling.inference_model import ( + GenerationMode, GenerationResult, GenerationSettings, ModelCapabilities, @@ -254,7 +255,11 @@ class HFTorchInferenceModel(HFInferenceModel): kwargs["logits_warper"] = new_get_logits_warper( beams=1, ) - if utils.koboldai_vars.newlinemode in ["s", "ns"]: + + if ( + utils.koboldai_vars.newlinemode in ["s", "ns"] + and not m_self.gen_state["allow_eos"] + ): kwargs["eos_token_id"] = -1 kwargs.setdefault("pad_token_id", 2) return new_sample.old_sample(self, *args, **kwargs) @@ -605,3 +610,9 @@ class HFTorchInferenceModel(HFInferenceModel): self.breakmodel = False self.usegpu = False return + + def get_supported_gen_modes(self) -> List[GenerationMode]: + # This changes a torch patch to disallow eos as a bad word. + return super().get_supported_gen_modes() + [ + GenerationMode.UNTIL_EOS + ] \ No newline at end of file From 6e7b0794ea80c9eae1a6bc4f89590e3d657febea Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 15:40:07 -0500 Subject: [PATCH 026/107] Context Menu: Fix for elements with a context-menu attribute but... ...without an entry in `context_menu_items`. --- static/koboldai.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/static/koboldai.js b/static/koboldai.js index d7560f54..b25bef31 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -6153,6 +6153,10 @@ process_cookies(); // Show only applicable actions in the context menu let contextMenuType = target.getAttribute("context-menu"); + + // If context menu is not present, return + if (!context_menu_actions[contextMenuType]) return; + for (const contextMenuItem of contextMenu.childNodes) { let shouldShow = contextMenuItem.classList.contains(`context-menu-${contextMenuType}`); From 560fb3bd2d2c054695765c5ae1826cd93f83519c Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 18:08:21 -0500 Subject: [PATCH 027/107] Fix occasional action highlight issue --- static/koboldai.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/static/koboldai.js b/static/koboldai.js index 8b70dd6a..0fde7169 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -597,13 +597,11 @@ function do_story_text_updates(action) { story_area.append(item); } } - - - if (action.action['Selected Text'].charAt(0) == ">") { - item.classList.add("action_mode_input"); - } else { - item.classList.remove("action_mode_input"); - } + + item.classList.toggle( + "action_mode_input", + action.action['Selected Text'].replaceAll("\n", "")[0] === ">" + ); if ('wi_highlighted_text' in action.action) { for (chunk of action.action['wi_highlighted_text']) { From 418f3415608de9c375e12d6713427967d13f9d52 Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 21 Jul 2023 18:13:57 -0500 Subject: [PATCH 028/107] Fix a/n depth being visually apart from a/n --- templates/story flyout.html | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/templates/story flyout.html b/templates/story flyout.html index 514edbb9..d08bf8b8 100644 --- a/templates/story flyout.html +++ b/templates/story flyout.html @@ -50,6 +50,14 @@

+
+ {% with menu='author_notes' %} + {% with sub_path='' %} + {% include 'settings item.html' %} + {% endwith %} + {% endwith %} +
+

Genre

Styles the AI will attempt to imitate. Effectiveness depends on model.
@@ -75,14 +83,6 @@ } - -
- {% with menu='author_notes' %} - {% with sub_path='' %} - {% include 'settings item.html' %} - {% endwith %} - {% endwith %} -
-
From dd8e5f5d0580c6320557d79ed536375c1ca669c4 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 23 Jul 2023 21:40:08 +0800 Subject: [PATCH 045/107] updated lite to v50 --- static/klite.html | 127 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 24 deletions(-) diff --git a/static/klite.html b/static/klite.html index 4b62dc2f..57c877cc 100644 --- a/static/klite.html +++ b/static/klite.html @@ -3,7 +3,7 @@
From 9cc6972c1c7ac6012181f7c6e43e6e7abb92a827 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 11:30:33 -0500 Subject: [PATCH 064/107] Shh! --- static/koboldai.js | 1 - 1 file changed, 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index 4902908f..1544ee93 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -776,7 +776,6 @@ function update_status_bar(data) { } function do_ai_busy(data) { - console.log("AIBUSY", data.value) ai_busy = data.value; // Don't allow editing while Mr. Kobold is thinking document.getElementById("Selected Text").contentEditable = !ai_busy; From a6aafb252534b26bbdf034788895c3317b4cdd53 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 13:07:30 -0500 Subject: [PATCH 065/107] GPTQ: Patch QuantLinear to not use CPU RAM --- .../inference_models/gptq_hf_torch/class.py | 23 ++++++++++++++++++- modeling/lazy_loader.py | 3 +-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 9a1b872e..d942a539 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -7,7 +7,7 @@ import torch import re import shutil import sys -from typing import Union +from typing import Dict, Union import utils import modeling.lazy_loader as lazy_loader @@ -167,6 +167,25 @@ class model_backend(HFTorchInferenceModel): self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() + def _patch_quant(self) -> None: + # QuantLinear loads on the CPU by default, using a lot of RAM! If we + # load it to the same device that the weights are gonna be on, it + # mysteriously uses no additional VRAM + + from gptq import quant_v3 + from gptq import quant_v2 + from gptq import quant_v1 + + def _ql_init_(self, *args, **kwargs): + ret = type(self)._unpatched_init(self, *args, **kwargs) + self.to("cuda:0") + return ret + + for quant_module in [quant_v3, quant_v2, quant_v1]: + quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__ + quant_module.QuantLinear.__init__ = _ql_init_ + + def _get_model(self, location: str, tf_kwargs: Dict): import gptq from gptq.gptj import load_quant as gptj_load_quant @@ -177,6 +196,8 @@ class model_backend(HFTorchInferenceModel): from gptq.mpt import load_quant as mpt_load_quant from gptq.offload import load_quant_offload + self._patch_quant() + gptq_model, gptq_bits, gptq_groupsize, gptq_file, gptq_version = load_model_gptq_settings(location) v2_bias = False diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index 8fff59d3..a5e7c58f 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -358,7 +358,6 @@ def safetensors_load_tensor_independently( ) -> torch.Tensor: """A hacky way to load a tensor by itself and not mmap every single tensor or whatever is causing that big memory spike""" - print("[ld]", tensor_key) with safetensors.safe_open(checkpoint_file, framework="pt", device=device) as f: return f.get_tensor(tensor_key) @@ -379,7 +378,7 @@ def patch_safetensors(callback): # (70 tensors/s -> 65 tensor/s). The memory savings probably # shouldn't be the happening, maybe there's a memory leak # somewhere in our pipeline with CPU tensors. - intermediary_device = "cuda" + intermediary_device = "cuda:0" else: intermediary_device = "cpu" From 4a6cccb00227561454e395b796aada44a60b05cf Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 13:09:15 -0500 Subject: [PATCH 066/107] Import fix --- modeling/inference_models/gptq_hf_torch/class.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index d942a539..499b2682 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -90,7 +90,6 @@ class model_backend(HFTorchInferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: try: - import hf_bleeding_edge from hf_bleeding_edge import AutoModelForCausalLM except ImportError: from transformers import AutoModelForCausalLM From 929917efe9bb51aa4fe2147f6813205908efb3f6 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 13:09:43 -0500 Subject: [PATCH 067/107] Remove shrieking --- modeling/inference_models/gptq_hf_torch/class.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 499b2682..74f11e18 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -208,19 +208,15 @@ class model_backend(HFTorchInferenceModel): logger.info(f"Using GPTQ file: {gptq_file}, {gptq_bits}-bit model, type {model_type}, version {gptq_version}{' (with bias)' if v2_bias else ''}, groupsize {gptq_groupsize}") - with lazy_loader.use_lazy_load( enable=self.lazy_load, dematerialized_modules=False, ): - print(self.lazy_load) if model_type == "gptj": model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) elif model_type == "gpt_neox": model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) elif model_type == "llama": - print("LLLLLAAAMMMAA") - print(torch.load) model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) elif model_type == "opt": model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) From 43a4abaf6320cc86e244cf103cc93b520339550e Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 13:10:33 -0500 Subject: [PATCH 068/107] Remove even more debug --- modeling/lazy_loader.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index a5e7c58f..74770a1c 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -176,9 +176,6 @@ class TorchLazyTensor(LazyTensor): CheckpointChunkCache.key = self.key ziproot = checkpoint.namelist()[0].split("/")[0] CheckpointChunkCache.handle = checkpoint.open(f"{ziproot}/data/{self.key}", "r") - - - else: # Cache hit. Hip hip hooray! :^) # print(".", end="", flush=True) @@ -318,7 +315,6 @@ class _LazyUnpickler(RestrictedUnpickler): lazy_loaded_storages: Dict[str, LazyTensor] def __init__(self, *args, **kwargs): - # print(args, kwargs) self.lazy_loaded_storages = {} return super().__init__(*args, **kwargs) @@ -364,12 +360,10 @@ def safetensors_load_tensor_independently( def patch_safetensors(callback): - print("Hi! We are patching safetensors") # Safetensors load patch import transformers def safetensors_load(checkpoint_file: str) -> dict: - print("LOAD NOW", safetensors_load) # Monkeypatch applied to safetensors.torch.load_file if utils.koboldai_vars.hascuda: @@ -523,7 +517,6 @@ def use_lazy_load( old_torch_load = torch.load def torch_load(f, map_location=None, pickle_module=pickle, **pickle_load_args): - print("TORCHLOAD", f) model_dict = old_torch_load( f=f, map_location=map_location, From 34aa333c44a16c38ce586efc0fb2118da0c20b0e Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 13:11:06 -0500 Subject: [PATCH 069/107] Last debug --- modeling/patches.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modeling/patches.py b/modeling/patches.py index f5b6bd06..5664ec07 100644 --- a/modeling/patches.py +++ b/modeling/patches.py @@ -144,7 +144,6 @@ class LazyloadPatches: LazyloadPatches._load_state_dict_into_meta_model ) torch.nn.Module._load_from_state_dict = LazyloadPatches._torch_load_from_state_dict - # torch.nn.Module._load_from_state_dict = _agn def __exit__(exc_type, exc_value, exc_traceback) -> None: transformers.modeling_utils._load_state_dict_into_meta_model = LazyloadPatches.old_load_state_dict From a73420c49c1371c49b59816d3122d6e6d4f3b676 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 17:15:59 -0500 Subject: [PATCH 070/107] really really really sketchy breakmodel implementation im gonna go lie down for an extended period of time --- .../inference_models/gptq_hf_torch/class.py | 175 +++++++++++++++--- 1 file changed, 153 insertions(+), 22 deletions(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 74f11e18..45d18f7b 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -82,6 +82,79 @@ def get_gptq_version(fpath): logger.warning(f"GPTQ model identified as v0, but v1={v1} and v2={v2}") return 0, False +def load_quant_offload_device_map( + load_quant_func, model, checkpoint, wbits, groupsize, device_map, offload_type=0, force_bias=False, +): + from gptq.offload import ( + find_layers, + llama_offload_forward, + gptneox_offload_forward, + gptj_offload_forward, + opt_offload_forward, + bigcode_offload_forward + ) + from transformers.models.llama.modeling_llama import LlamaModel + from transformers.models.opt.modeling_opt import OPTModel + from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXModel + from transformers.models.gptj.modeling_gptj import GPTJModel + from transformers.models.gpt_bigcode.modeling_gpt_bigcode import GPTBigCodeModel + model = load_quant_func(model, checkpoint, wbits, groupsize, force_bias=force_bias) + + print(device_map) + + m, layers, remaining = find_layers(model) + + type(m).non_offload_forward = type(m).forward + + # Hook offload_forward into found model + if type(m) == LlamaModel: + type(m).forward = llama_offload_forward + elif type(m) == GPTNeoXModel: + type(m).forward = gptneox_offload_forward + elif type(m) == GPTJModel: + type(m).forward = gptj_offload_forward + elif type(m) == OPTModel: + type(m).forward = opt_offload_forward + elif type(m) == GPTBigCodeModel: + type(m).forward = bigcode_offload_forward + else: + raise RuntimeError(f"Model type {type(m)} not supported by CPU offloader") + + layers_done = len([1 for v in device_map.values() if v != "cpu"]) + print("LDone", layers_done) + + m.cpu_device = torch.device("cpu") + m.fast_offload = layers_done > len(layers) // 2 + m.layer_count = len(layers) + m.cpu_layers = len(layers) - layers_done + m.gpu_layers = layers_done + m.offload_type = offload_type + # HACK + m.primary_gpu = list(device_map.values())[0] + + if "layers" not in dir(m): + m.layers = layers + + print(len(layers)) + print(len(device_map)) + + print(m.primary_gpu) + for i in range(len(layers)): + dev = None + for key, device in device_map.items(): + key = int(*[x for x in key.split(".") if x.isdecimal()]) + if key == i: + dev = device + break + if dev is None: + raise ValueError + layers[key].to(dev, torch.float16, False) + + for module in remaining: + module.to(m.primary_gpu) + + return model + class model_backend(HFTorchInferenceModel): def is_valid(self, model_name, model_path, menu_path): @@ -166,7 +239,7 @@ class model_backend(HFTorchInferenceModel): self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() - def _patch_quant(self) -> None: + def _patch_quant(self, device_map) -> None: # QuantLinear loads on the CPU by default, using a lot of RAM! If we # load it to the same device that the weights are gonna be on, it # mysteriously uses no additional VRAM @@ -175,14 +248,54 @@ class model_backend(HFTorchInferenceModel): from gptq import quant_v2 from gptq import quant_v1 - def _ql_init_(self, *args, **kwargs): - ret = type(self)._unpatched_init(self, *args, **kwargs) - self.to("cuda:0") - return ret + def make_quant(module, names, bits, groupsize, name='', force_bias=False): + if isinstance(module, quant_v3.QuantLinear): + return - for quant_module in [quant_v3, quant_v2, quant_v1]: - quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__ - quant_module.QuantLinear.__init__ = _ql_init_ + for attr in dir(module): + tmp = getattr(module, attr) + name1 = name + '.' + attr if name != '' else attr + if name1 in names: + parts = name1.split(".") + device = None + for i in reversed(range(len(parts))): + maybe_key = ".".join(parts[:i]) + if maybe_key in device_map: + device = device_map[maybe_key] + break + + if device is None: + print(name1) + print(device_map) + raise ValueError + + print("[ql]", name1, device) + delattr(module, attr) + + ql = quant_v3.QuantLinear( + bits, + groupsize, + tmp.in_features, + tmp.out_features, + force_bias or tmp.bias is not None + ) + ql = ql.to(device) + + setattr(module, attr, ql) + + for name1, child in module.named_children(): + make_quant(child, names, bits, groupsize, name + '.' + name1 if name != '' else name1, force_bias=force_bias) + + quant_v3.make_quant = make_quant + + # def _ql_init_(self, *args, **kwargs): + # ret = type(self)._unpatched_init(self, *args, **kwargs) + # self.to("cuda:0") + # return ret + + # for quant_module in [quant_v3, quant_v2, quant_v1]: + # quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__ + # quant_module.QuantLinear.__init__ = _ql_init_ def _get_model(self, location: str, tf_kwargs: Dict): @@ -193,9 +306,12 @@ class model_backend(HFTorchInferenceModel): from gptq.opt import load_quant as opt_load_quant from gptq.bigcode import load_quant as bigcode_load_quant from gptq.mpt import load_quant as mpt_load_quant - from gptq.offload import load_quant_offload - self._patch_quant() + try: + import hf_bleeding_edge + from hf_bleeding_edge import AutoModelForCausalLM + except ImportError: + from transformers import AutoModelForCausalLM gptq_model, gptq_bits, gptq_groupsize, gptq_file, gptq_version = load_model_gptq_settings(location) v2_bias = False @@ -208,22 +324,43 @@ class model_backend(HFTorchInferenceModel): logger.info(f"Using GPTQ file: {gptq_file}, {gptq_bits}-bit model, type {model_type}, version {gptq_version}{' (with bias)' if v2_bias else ''}, groupsize {gptq_groupsize}") + device_map = {} + + if self.lazy_load: + with lazy_loader.use_lazy_load(dematerialized_modules=True): + metamodel = AutoModelForCausalLM.from_config(self.model_config) + if utils.args.cpu: + device_map = {name: "cpu" for name in utils.layers_module_names} + for name in utils.get_missing_module_names( + metamodel, list(device_map.keys()) + ): + device_map[name] = "cpu" + else: + device_map = self.breakmodel_config.get_device_map( + metamodel + ) + + self._patch_quant(device_map) + with lazy_loader.use_lazy_load( enable=self.lazy_load, dematerialized_modules=False, ): if model_type == "gptj": - model = load_quant_offload(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) + model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "gpt_neox": - model = load_quant_offload(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) + model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "llama": - model = load_quant_offload(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) + print("YE LAMA") + + # model = llama_load_quant(location, gptq_file, gptq_bits, gptq_groupsize, force_bias=v2_bias) + model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "opt": - model = load_quant_offload(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) + model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "mpt": - model = load_quant_offload(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias) + model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "gpt_bigcode": - model = load_quant_offload(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, self.gpu_layers_list, force_bias=v2_bias).half() + model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() else: try: import auto_gptq @@ -231,12 +368,6 @@ class model_backend(HFTorchInferenceModel): except ImportError: raise RuntimeError(f"4-bit load failed. Model type {model_type} not supported in 4-bit") - try: - import hf_bleeding_edge - from hf_bleeding_edge import AutoModelForCausalLM - except ImportError: - from transformers import AutoModelForCausalLM - # Monkey patch in hf_bleeding_edge to avoid having to trust remote code auto_gptq.modeling._utils.AutoConfig = hf_bleeding_edge.AutoConfig auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig From ad4528b5a6882e1bdb46e111be90d6f931090733 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 17:17:57 -0500 Subject: [PATCH 071/107] critical change --- modeling/inference_models/gptq_hf_torch/class.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 45d18f7b..10349388 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -103,7 +103,6 @@ def load_quant_offload_device_map( print(device_map) m, layers, remaining = find_layers(model) - type(m).non_offload_forward = type(m).forward # Hook offload_forward into found model From c80de5120c3bfd28f5a4963eabd562915bc7d015 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 19:45:33 -0500 Subject: [PATCH 072/107] Cleanup --- .../inference_models/gptq_hf_torch/class.py | 69 ++++++------------- 1 file changed, 22 insertions(+), 47 deletions(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 10349388..6fae6779 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -100,8 +100,6 @@ def load_quant_offload_device_map( from transformers.models.gpt_bigcode.modeling_gpt_bigcode import GPTBigCodeModel model = load_quant_func(model, checkpoint, wbits, groupsize, force_bias=force_bias) - print(device_map) - m, layers, remaining = find_layers(model) type(m).non_offload_forward = type(m).forward @@ -120,7 +118,6 @@ def load_quant_offload_device_map( raise RuntimeError(f"Model type {type(m)} not supported by CPU offloader") layers_done = len([1 for v in device_map.values() if v != "cpu"]) - print("LDone", layers_done) m.cpu_device = torch.device("cpu") m.fast_offload = layers_done > len(layers) // 2 @@ -134,10 +131,6 @@ def load_quant_offload_device_map( if "layers" not in dir(m): m.layers = layers - print(len(layers)) - print(len(device_map)) - - print(m.primary_gpu) for i in range(len(layers)): dev = None for key, device in device_map.items(): @@ -184,10 +177,6 @@ class model_backend(HFTorchInferenceModel): except (ValueError, AttributeError): self.gpu_layers_list = [utils.num_layers(self.model_config)] - tf_kwargs = { - "low_cpu_mem_usage": True, - } - # If we're using torch_lazy_loader, we need to get breakmodel config # early so that it knows where to load the individual model tensors logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel)) @@ -200,9 +189,6 @@ class model_backend(HFTorchInferenceModel): self.breakmodel_device_config(self.model_config) if self.lazy_load: - # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time - tf_kwargs.pop("low_cpu_mem_usage", None) - # If we're using lazy loader, we need to figure out what the model's hidden layers are called with lazy_loader.use_lazy_load(dematerialized_modules=True): try: @@ -218,7 +204,7 @@ class model_backend(HFTorchInferenceModel): if self.get_local_model_path(): # Model is stored locally, load it. - self.model = self._get_model(self.get_local_model_path(), tf_kwargs) + self.model = self._get_model(self.get_local_model_path()) self.tokenizer = self._get_tokenizer(self.get_local_model_path()) else: raise NotImplementedError("GPTQ Model downloading not implemented") @@ -238,17 +224,9 @@ class model_backend(HFTorchInferenceModel): self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() - def _patch_quant(self, device_map) -> None: - # QuantLinear loads on the CPU by default, using a lot of RAM! If we - # load it to the same device that the weights are gonna be on, it - # mysteriously uses no additional VRAM - - from gptq import quant_v3 - from gptq import quant_v2 - from gptq import quant_v1 - - def make_quant(module, names, bits, groupsize, name='', force_bias=False): - if isinstance(module, quant_v3.QuantLinear): + def _patch_quant(self, device_map, quant_module) -> None: + def make_quant(module, names, bits, groupsize, name='', force_bias=False, **kwargs): + if isinstance(module, quant_module.QuantLinear): return for attr in dir(module): @@ -264,19 +242,17 @@ class model_backend(HFTorchInferenceModel): break if device is None: - print(name1) - print(device_map) - raise ValueError + raise ValueError(f"No device for {name1}") - print("[ql]", name1, device) delattr(module, attr) - ql = quant_v3.QuantLinear( + ql = quant_module.QuantLinear( bits, groupsize, tmp.in_features, tmp.out_features, - force_bias or tmp.bias is not None + force_bias or tmp.bias is not None, + **kwargs, ) ql = ql.to(device) @@ -285,19 +261,21 @@ class model_backend(HFTorchInferenceModel): for name1, child in module.named_children(): make_quant(child, names, bits, groupsize, name + '.' + name1 if name != '' else name1, force_bias=force_bias) - quant_v3.make_quant = make_quant - - # def _ql_init_(self, *args, **kwargs): - # ret = type(self)._unpatched_init(self, *args, **kwargs) - # self.to("cuda:0") - # return ret - - # for quant_module in [quant_v3, quant_v2, quant_v1]: - # quant_module.QuantLinear._unpatched_init = quant_module.QuantLinear.__init__ - # quant_module.QuantLinear.__init__ = _ql_init_ + quant_module.make_quant = make_quant - def _get_model(self, location: str, tf_kwargs: Dict): + def _patch_quants(self, device_map) -> None: + # Load QuantLinears on the device corresponding to the device map + + from gptq import quant_v3 + from gptq import quant_v2 + from gptq import quant_v1 + + for quant_module in [quant_v3, quant_v2, quant_v1]: + self._patch_quant(device_map, quant_module) + + + def _get_model(self, location: str): import gptq from gptq.gptj import load_quant as gptj_load_quant from gptq.gptneox import load_quant as gptneox_load_quant @@ -339,7 +317,7 @@ class model_backend(HFTorchInferenceModel): metamodel ) - self._patch_quant(device_map) + self._patch_quants(device_map) with lazy_loader.use_lazy_load( enable=self.lazy_load, @@ -350,9 +328,6 @@ class model_backend(HFTorchInferenceModel): elif model_type == "gpt_neox": model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "llama": - print("YE LAMA") - - # model = llama_load_quant(location, gptq_file, gptq_bits, gptq_groupsize, force_bias=v2_bias) model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) elif model_type == "opt": model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) From 0f88d520ed05281994672c7d7740e23f3812ccfe Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 24 Jul 2023 21:45:52 -0500 Subject: [PATCH 073/107] UI: Replace shift_down code with builtin event.shiftKey Keeping a global variable that tracks shift is worse because it can get desynced if you leave the window while holding shift (which apparently happens a lot more than you would think) --- static/koboldai.js | 59 ++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/static/koboldai.js b/static/koboldai.js index f775f3f0..242b77e0 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -58,7 +58,6 @@ var rename_return_emit_name = "popup_rename"; var popup_rows = []; var popup_style = ""; var popup_sort = {}; -var shift_down = false; var world_info_data = {}; var world_info_folder_data = {}; var saved_settings = {}; @@ -4925,49 +4924,44 @@ function getCookie(cname, default_return=null) { } function detect_enter_submit(e) { - if (((e.code == "Enter") || (e.code == "NumpadEnter")) && !(shift_down)) { - if (typeof e.stopPropagation != "undefined") { - e.stopPropagation(); - } else { - e.cancelBubble = true; - } - //console.log("submitting"); - document.getElementById("btnsubmit").onclick(); - setTimeout(function() {document.getElementById('input_text').value = '';}, 1); + if (e.shiftKey) return; + if (!["Enter", "NumpadEnter"].includes(e.key)) return; + + if (typeof e.stopPropagation != "undefined") { + e.stopPropagation(); + } else { + e.cancelBubble = true; } + + //console.log("submitting"); + document.getElementById("btnsubmit").onclick(); + setTimeout(function() {document.getElementById('input_text').value = '';}, 1); } function detect_enter_text(e) { - if (((e.code == "Enter") || (e.code == "NumpadEnter")) && !(shift_down)) { - if (typeof e.stopPropagation != "undefined") { - e.stopPropagation(); - } else { - e.cancelBubble = true; - } - //get element - //console.log("Doing Text Enter"); - //console.log(e.currentTarget.activeElement); - if (e.currentTarget.activeElement != undefined) { - var item = $(e.currentTarget.activeElement); - item.onchange(); - } + if (e.shiftKey) return; + if (!["Enter", "NumpadEnter"].includes(e.key)) return; + + if (typeof e.stopPropagation != "undefined") { + e.stopPropagation(); + } else { + e.cancelBubble = true; + } + //get element + //console.log("Doing Text Enter"); + //console.log(e.currentTarget.activeElement); + if (e.currentTarget.activeElement != undefined) { + var item = $(e.currentTarget.activeElement); + item.onchange(); } } function detect_key_down(e) { - if ((e.code == "ShiftLeft") || (e.code == "ShiftRight")) { - shift_down = true; - } else if (e.code == "Escape") { + if (e.code == "Escape") { close_menus(); } } -function detect_key_up(e) { - if ((e.code == "ShiftLeft") || (e.code == "ShiftRight")) { - shift_down = false; - } -} - function selectTab(tab) { let tabTarget = document.getElementById(tab.getAttribute("tab-target")); let tabClass = Array.from(tab.classList).filter((c) => c.startsWith("tab-"))[0]; @@ -5935,7 +5929,6 @@ function openClubImport() { //// INIT //// document.onkeydown = detect_key_down; -document.onkeyup = detect_key_up; document.getElementById("input_text").onkeydown = detect_enter_submit; /* -- Popups -- */ From 79226ea66d10d579b019e43d7d1a86ede3659956 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 25 Jul 2023 21:51:03 +0200 Subject: [PATCH 074/107] Hide TPU API during load --- tpu_mtj_backend.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 5a5271e2..8a9fa832 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1116,10 +1116,11 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword thread_resources_env = maps.ResourceEnv(maps.Mesh(devices, ('dp', 'mp')), ()) maps.thread_resources.env = thread_resources_env if initial_load: - logger.message(f"KoboldAI has finished loading and is available at the following link for UI 1: {koboldai_vars.cloudflare_link}") - logger.message(f"KoboldAI has finished loading and is available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui") - logger.message(f"KoboldAI has finished loading and is available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite") - logger.message(f"KoboldAI has finished loading and is available at the following link for the API: {koboldai_vars.cloudflare_link}/api") + logger.message(f"KoboldAI has still loading your model but available at the following link for UI 1: {koboldai_vars.cloudflare_link}") + logger.message(f"KoboldAI has still loading your model but available at the following link for UI 2: {koboldai_vars.cloudflare_link}/new_ui") + logger.message(f"KoboldAI has still loading your model but available at the following link for KoboldAI Lite: {koboldai_vars.cloudflare_link}/lite") + logger.message(f"KoboldAI has still loading your model but available at the following link for the API: [Loading Model...]") + logger.message(f"While the model loads you can use the above links to begin setting up your session, for generations you must wait until after its done loading.") global badwords # These are the tokens that we don't want the AI to ever write From b20f320b223e6beb26052b2151b08a18330b4433 Mon Sep 17 00:00:00 2001 From: somebody Date: Tue, 25 Jul 2023 22:46:02 -0500 Subject: [PATCH 075/107] Redo workaround --- aiserver.py | 5 + koboldai_settings.py | 24 +++- static/koboldai.css | 42 +++--- static/koboldai.js | 290 +++++++++++++++++++++++---------------- templates/index_new.html | 4 +- 5 files changed, 221 insertions(+), 144 deletions(-) diff --git a/aiserver.py b/aiserver.py index 77afc3d0..6d50ca73 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3902,6 +3902,10 @@ def generate(txt, minimum, maximum, found_entries=None): # Open up token stream emit("stream_tokens", True, broadcast=True, room="UI_2") + # HACK: Show options when streaming more than 1 sequence + if utils.koboldai_vars.output_streaming: + koboldai_vars.actions.show_options(koboldai_vars.numseqs > 1, force=True) + koboldai_vars.generated_tkns = 0 if(found_entries is None): @@ -6166,6 +6170,7 @@ def UI_2_Set_Selected_Text(data): @socketio.on('Use Option Text') @logger.catch def UI_2_Use_Option_Text(data): + koboldai_vars.actions.show_options(False) if koboldai_vars.prompt == "": koboldai_vars.prompt = koboldai_vars.actions.get_current_options()[int(data['option'])]['text'] koboldai_vars.actions.clear_unused_options() diff --git a/koboldai_settings.py b/koboldai_settings.py index 095f1f47..bf824a7c 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1756,11 +1756,15 @@ class KoboldStoryRegister(object): def go_forward(self): action_step = self.action_count+1 - if action_step in self.actions: - if len(self.get_current_options()) == 1: - logger.warning("Going forward with this text: {}".format(self.get_current_options()[0]["text"])) - self.use_option([x['text'] for x in self.actions[action_step]["Options"]].index(self.get_current_options()[0]["text"])) - + if action_step not in self.actions: + return + + self.show_options(len(self.get_current_options()) > 1) + + if len(self.get_current_options()) == 1: + logger.warning("Going forward with this text: {}".format(self.get_current_options()[0]["text"])) + self.use_option([x['text'] for x in self.actions[action_step]["Options"]].index(self.get_current_options()[0]["text"])) + def use_option(self, option_number, action_step=None): if action_step is None: action_step = self.action_count+1 @@ -1798,6 +1802,16 @@ class KoboldStoryRegister(object): process_variable_changes(self._socketio, "story", 'actions', {"id": action_step, 'action': self.actions[action_step]}, None) self.set_game_saved() + def show_options( + self, + should_show: bool, + force: bool = False, + + ) -> None: + if self._koboldai_vars.aibusy and not force: + return + self._socketio.emit("show_options", should_show, broadcast=True, room="UI_2") + def delete_action(self, action_id, keep=True): if action_id in self.actions: old_options = copy.deepcopy(self.actions[action_id]["Options"]) diff --git a/static/koboldai.css b/static/koboldai.css index 145f217e..1c2ebef3 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -1528,7 +1528,7 @@ body { grid-template-columns: 30px auto 30% 30px; grid-template-rows: auto min-content min-content 100px; } -.main-grid[option_length="0"][model_numseqs="1"] { +.main-grid[hide-options="true"] { grid-template-columns: 30px auto 0px 30px; } @@ -1613,39 +1613,39 @@ body { font-style: italic; } -.sequence_area { +#option-container { margin-top: 10px; grid-area: options; background-color: var(--sequence_area_background); overflow-y: scroll; } -.sequence_area::-webkit-scrollbar { +#option-container::-webkit-scrollbar { display: none; } @media only screen and (max-aspect-ratio: 7/5) { -.sequences { - margin-top: 5px; - width: 100%; - border: 0px; - border-spacing: 0; - display: flex; - flex-direction: row; - overflow-x: scroll; - scroll-snap-type: x mandatory; -} + #option-container { + margin-top: 5px; + width: 100%; + border: 0px; + border-spacing: 0; + display: flex; + flex-direction: row; + overflow-x: scroll; + scroll-snap-type: x mandatory; + } } @media only screen and (min-aspect-ratio: 7/5) { -.sequences { - margin-top: 5px; - width: 100%; - border: 0px; - border-spacing: 0; - display: flex; - flex-direction: column; -} + #option-container { + margin-top: 5px; + width: 100%; + border: 0px; + border-spacing: 0; + display: flex; + flex-direction: column; + } } .sequence_row { diff --git a/static/koboldai.js b/static/koboldai.js index 1544ee93..87f2f944 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -38,6 +38,7 @@ socket.on("scratchpad_response", recieveScratchpadResponse); socket.on("show_error_notification", function(data) { reportError(data.title, data.text) }); socket.on("generated_wi", showGeneratedWIData); socket.on("stream_tokens", stream_tokens); +socket.on("show_options", show_options); //socket.onAny(function(event_name, data) {console.log({"event": event_name, "class": data.classname, "data": data});}); // Must be done before any elements are made; we track their changes. @@ -86,6 +87,7 @@ var initial_socketio_connection_occured = false; var selected_model_data; var privacy_mode_enabled = false; var ai_busy = false; +var can_show_options = false; var streaming = { windowOpen: false, @@ -309,7 +311,7 @@ function reset_story() { } //clear any options - var option_area = document.getElementById("Select Options"); + var option_area = document.getElementById("option-container"); while (option_area.firstChild) { option_area.removeChild(option_area.firstChild); } @@ -341,7 +343,7 @@ function reset_story() { document.getElementById("Selected Text").setAttribute("contenteditable", "true"); } - document.getElementById('main-grid').setAttribute('option_length', 0); + document.getElementById('main-grid').setAttribute("hide-options", true); $(".chat-message").remove(); addInitChatMessage(); @@ -361,22 +363,143 @@ function fix_text(val) { } } +function create_option_element(text, optionId, actionId, type, itemPinned=false) { + // Type must be "gen" or "history" + const optionContainer = $el("#option-container"); + const row = $e("div", optionContainer, { + classes: ["sequence_row"], + "option_id": optionId, + "action_id": actionId, + }); + + const textcell = document.createElement("span"); + textcell.textContent = text; + textcell.classList.add("sequence"); + textcell.setAttribute("option_id", optionId); + textcell.setAttribute("option_chunk", actionId); + + const iconcell = document.createElement("span"); + iconcell.setAttribute("option_id", optionId); + iconcell.setAttribute("option_chunk", actionId); + iconcell.classList.add("sequnce_icon"); + + const icon = document.createElement("span"); + icon.id = "Pin_"+optionId; + icon.classList.add("material-icons-outlined"); + icon.classList.add("option_icon"); + icon.classList.add("cursor"); + + if (type === "gen") { + icon.classList.add("pin"); + icon.textContent = "push_pin"; + + if (itemPinned) { + icon.classList.add('rotate_45'); + } else { + icon.setAttribute('style', "filter: brightness(50%);"); + } + + iconcell.addEventListener("click", function() { + socket.emit("Pinning", { + chunk: actionId, + option: optionId + }); + }); + } else if (type === "history") { + icon.textContent = "cached"; + + const delete_icon = $e("span", iconcell, { + classes: ["material-icons-outlined", "cursor", 'option_icon'], + tooltip: "Delete Option", + option_id: optionId, + option_chunk: actionId, + textContent: 'delete' + }); + + delete_icon.addEventListener("click", function() { + socket.emit("delete_option", { + chunk: actionId, + option: optionId + }); + }); + } + + + iconcell.append(icon); + + textcell.addEventListener("click", function() { + socket.emit("Use Option Text", { + chunk: actionId, + option: optionId, + }); + }); + + row.append(textcell); + row.append(iconcell); + optionContainer.append(row); + return row; +} + +function action_count_changed() { + // Delete all options before the next chunk to hidden + const option_container = document.getElementById("option-container"); + const current_chunk = parseInt(document.getElementById("action_count").textContent) + 1; + + for (const chunk of Array.from(option_container.children)) { + if (parseInt(chunk.getAttribute("action_id")) === current_chunk) { + // good + } else { + chunk.remove(); + } + } +} + +function visible_options_present() { + const optionContainer = $el("#option-container"); + for (const el of optionContainer.childNodes) { + if (el.classList.contains("hidden")) continue; + return true; + } + return false; +} + +function show_options(doShow) { + can_show_options = doShow; + let show = doShow;// && visible_options_present(); + $el("#option-container").classList.toggle("hidden", !show); + $el("#main-grid").setAttribute("hide-options", !show); + + if (show) { + const action = actions_data[current_action + 1]; + if (!action) return; + + create_options({ + id: current_action+1, + action: action + }); + } +} + function create_options(action) { //Set all options before the next chunk to hidden - if (action.id != current_action+1) { + if (action.id != current_action+1) { return; } - var option_chunk = document.getElementById("Select Options"); - - //first, let's clear out our existing data - while (option_chunk.firstChild) { - option_chunk.removeChild(option_chunk.firstChild); + + if (!can_show_options) { + return; } - + + // First, let's clear out our existing data. Note: use querySelectorAll to + // iterate for deletion because other methods resize the list during iteration + for (const option of document.querySelectorAll(".sequence_row")) { + option.remove(); + } + //Let's check if we only have a single redo option. In that case we din't show as the user can use the redo button - seen_prev_selection = false; - show_options = false; - for (item of action.action.Options) { + let seen_prev_selection = false; + let show_options = false; + for (const item of action.action.Options) { if (!(item['Previous Selection']) && !(item['Edited'])) { show_options = true; break; @@ -389,100 +512,46 @@ function create_options(action) { } } } - if (!(show_options)) { - document.getElementById('main-grid').setAttribute('option_length', 0); + + const mainGrid = $el("#main-grid"); + const optionContainer = $el("#option-container"); + + if (!show_options) { + mainGrid.setAttribute("hide-options", true); + optionContainer.classList.add("hidden"); return; } - - document.getElementById('main-grid').setAttribute('option_length', action.action.Options.length); - - var table = document.createElement("div"); - table.classList.add("sequences"); - //Add Redo options - let added_options=0; - i=0; - for (item of action.action.Options) { - if ((item['Previous Selection']) && (item.text != "")) { - var row = document.createElement("div"); - row.classList.add("sequence_row"); - var textcell = document.createElement("span"); - textcell.textContent = item.text; - textcell.classList.add("sequence"); - textcell.setAttribute("option_id", i); - textcell.setAttribute("option_chunk", action.id); - var iconcell = document.createElement("span"); - iconcell.setAttribute("option_id", i); - iconcell.setAttribute("option_chunk", action.id); - iconcell.classList.add("sequnce_icon"); - var icon = document.createElement("span"); - icon.id = "Pin_"+i; - icon.classList.add("material-icons-outlined"); - icon.classList.add("option_icon"); - icon.classList.add("cursor"); - icon.textContent = "cached"; - iconcell.append(icon); - delete_icon = $e("span", iconcell, {"classes": ["material-icons-outlined", "cursor", 'option_icon'], - "tooltip": "Delete Option", 'option_id': i, - 'option_chunk': action.id, 'textContent': 'delete'}); - delete_icon.onclick = function () { - socket.emit("delete_option", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")}); - }; - textcell.onclick = function () { - socket.emit("Use Option Text", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")}); - }; - row.append(textcell); - row.append(iconcell); - table.append(row); - added_options+=1; - } - i+=1; + + // mainGrid.setAttribute("hide-options", false); + // optionContainer.classList.toggle("hidden", action.action.Options.length < 1); + + // Gens + let optionId = 0; + for (const item of action.action.Options) { + if (!item.text) continue; + if (item.Edited) continue; + if (item["Previous Selection"]) continue; + + create_option_element(item.text, optionId, action.id, "gen", item.Pinned); + optionId++; } - //Add general options - i=0; - for (item of action.action.Options) { - if (!(item.Edited) && !(item['Previous Selection']) && (item.text != "")) { - var row = document.createElement("div"); - row.classList.add("sequence_row"); - var textcell = document.createElement("span"); - textcell.textContent = item.text; - textcell.classList.add("sequence"); - textcell.setAttribute("option_id", i); - textcell.setAttribute("option_chunk", action.id); - var iconcell = document.createElement("span"); - iconcell.setAttribute("option_id", i); - iconcell.setAttribute("option_chunk", action.id); - iconcell.classList.add("sequnce_icon"); - var icon = document.createElement("span"); - icon.id = "Pin_"+i; - icon.classList.add("material-icons-outlined"); - icon.classList.add("option_icon"); - icon.classList.add("cursor"); - icon.classList.add("pin"); - icon.textContent = "push_pin"; - if (!(item.Pinned)) { - icon.setAttribute('style', "filter: brightness(50%);"); - } else { - icon.classList.add('rotate_45'); - } - iconcell.append(icon); - iconcell.onclick = function () { - socket.emit("Pinning", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")}); - }; - textcell.onclick = function () { - socket.emit("Use Option Text", {"chunk": this.getAttribute("option_chunk"), "option": this.getAttribute("option_id")}); - }; - row.append(textcell); - row.append(iconcell); - table.append(row); - added_options+=1; - } - i+=1; + + + // History + optionId = 0; + for (const item of action.action.Options) { + if (!item.text) continue; + if (!item["Previous Selection"]) continue; + + create_option_element(item.text, optionId, action.id, "history"); + optionId++; } - if (added_options > 0) { - option_chunk.append(table); - } - - + + let anyOptions = visible_options_present(); + + $el("#option-container").classList.toggle("hidden", !anyOptions); + $el("#main-grid").setAttribute("hide-options", !anyOptions); + //make sure our last updated chunk is in view //option_chunk.scrollIntoView(); } @@ -520,7 +589,8 @@ function process_actions_data(data) { //update action_type = "update"; } - for (action of actions) { + + for (const action of actions) { actions_data[parseInt(action.id)] = action.action; do_story_text_updates(action); create_options(action); @@ -1029,20 +1099,8 @@ function var_changed(data) { // Change_Theme(getCookie("theme", "Monochrome")); //} - //Set all options before the next chunk to hidden if ((data.classname == "actions") && (data.name == "Action Count")) { - var option_container = document.getElementById("Select Options"); - var current_chunk = parseInt(document.getElementById("action_count").textContent)+1; - - var children = option_container.children; - for (var i = 0; i < children.length; i++) { - var chunk = children[i]; - if (chunk.id == "Select Options Chunk " + current_chunk) { - chunk.classList.remove("hidden"); - } else { - chunk.classList.add("hidden"); - } - } + action_count_changed(); } diff --git a/templates/index_new.html b/templates/index_new.html index 25dee500..50fb0281 100644 --- a/templates/index_new.html +++ b/templates/index_new.html @@ -44,7 +44,7 @@ -
+

Disconnected

@@ -59,7 +59,7 @@ -
+