From 33f9f2dc822f7ac21dd448447c1c2ddda1e6032e Mon Sep 17 00:00:00 2001 From: Gnome Ann <> Date: Sun, 16 Jan 2022 21:09:10 -0500 Subject: [PATCH 1/5] Show message when TPU backend is compiling --- aiserver.py | 29 ++++++++++++++++++++++++++++- static/application.js | 11 ++++++++--- templates/index.html | 2 +- tpu_mtj_backend.py | 18 +++++++++++++++++- 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2be0811f..7bf3230f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -14,6 +14,7 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '50' from eventlet import tpool from os import path, getcwd +import time import re import json import collections @@ -127,6 +128,8 @@ class vars: lua_edited = set() # Set of chunk numbers that were edited from a Lua generation modifier lua_deleted = set() # Set of chunk numbers that were deleted from a Lua generation modifier generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0 + compiling = False # If using a TPU Colab, this will be set to True when the TPU backend starts compiling and then set to False again + checking = False # Whether or not we are actively checking to see if TPU backend is compiling or not spfilename = "" # Filename of soft prompt to load, or an empty string if not using a soft prompt userscripts = [] # List of userscripts to load last_userscripts = [] # List of previous userscript filenames from the previous time userscripts were send via usstatitems @@ -638,7 +641,7 @@ log.setLevel(logging.ERROR) # Start flask & SocketIO print("{0}Initializing Flask... {1}".format(colors.PURPLE, colors.END), end="") -from flask import Flask, render_template, Response, request +from flask import Flask, render_template, Response, request, copy_current_request_context from flask_socketio import SocketIO, emit app = Flask(__name__) app.config['SECRET KEY'] = 'secret!' @@ -1054,6 +1057,13 @@ else: break return excluded_world_info, regeneration_required, halt + def tpumtjgenerate_compiling_callback() -> None: + print(colors.GREEN + "TPU backend compilation triggered" + colors.END) + vars.compiling = True + + def tpumtjgenerate_stopped_compiling_callback() -> None: + vars.compiling = False + # If we're running Colab or OAI, we still need a tokenizer. if(vars.model == "Colab"): from transformers import GPT2TokenizerFast @@ -1068,6 +1078,8 @@ else: import tpu_mtj_backend tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback + tpu_mtj_backend.compiling_callback = tpumtjgenerate_compiling_callback + tpu_mtj_backend.stopped_compiling_callback = tpumtjgenerate_stopped_compiling_callback tpu_mtj_backend.load_model(vars.custmodpth) vars.allowsp = True vars.modeldim = int(tpu_mtj_backend.params["d_model"]) @@ -1645,6 +1657,7 @@ def execute_genmod(): vars.lua_koboldbridge.execute_genmod() def execute_outmod(): + emit('from_server', {'cmd': 'hidemsg', 'data': ''}, broadcast=True) try: tpool.execute(vars.lua_koboldbridge.execute_outmod) except lupa.LuaError as e: @@ -2251,6 +2264,18 @@ def settingschanged(): #==================================================================# # Take input text from SocketIO and decide what to do with it #==================================================================# + +def check_for_backend_compilation(): + if(vars.checking): + return + vars.checking = True + for _ in range(31): + time.sleep(0.06276680299820175) + if(vars.compiling): + emit('from_server', {'cmd': 'warnmsg', 'data': 'Compiling TPU backend—this usually takes 1–2 minutes...'}, broadcast=True) + break + vars.checking = False + def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False): # Ignore new submissions if the AI is currently busy if(vars.aibusy): @@ -2966,6 +2991,8 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): global past + socketio.start_background_task(copy_current_request_context(check_for_backend_compilation)) + if(vars.dynamicscan or (not vars.nogenmod and vars.has_genmod)): context = np.tile(np.uint32(txt), (vars.numseqs, 1)) diff --git a/static/application.js b/static/application.js index 403a9d8b..de703be1 100644 --- a/static/application.js +++ b/static/application.js @@ -663,9 +663,9 @@ function showMessage(msg) { message_text.html(msg); } -function errMessage(msg) { +function errMessage(msg, type="error") { message_text.removeClass(); - message_text.addClass("color_red"); + message_text.addClass(type == "warn" ? "color_orange" : "color_red"); message_text.html(msg); } @@ -1932,7 +1932,12 @@ $(document).ready(function(){ } } else if(msg.cmd == "errmsg") { // Send error message - errMessage(msg.data); + errMessage(msg.data, "error"); + } else if(msg.cmd == "warnmsg") { + // Send warning message + errMessage(msg.data, "warn"); + } else if(msg.cmd == "hidemsg") { + hideMessage(); } else if(msg.cmd == "texteffect") { // Apply color highlight to line of text newTextHighlight($("#n"+msg.data)) diff --git a/templates/index.html b/templates/index.html index 8bab5ea6..dec0bcae 100644 --- a/templates/index.html +++ b/templates/index.html @@ -17,7 +17,7 @@ - + diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index c68af60c..1afabb27 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -26,6 +26,15 @@ def warper_callback(logits) -> np.array: def stopping_callback(generated, n_generated, excluded_world_info) -> Tuple[List[set], bool, bool]: raise NotImplementedError("`tpu_mtj_backend.stopping_callback()` needs to be defined") +def started_compiling_callback() -> None: + pass + +def stopped_compiling_callback() -> None: + pass + +def compiling_callback() -> None: + pass + def show_spinner(): bar = progressbar.ProgressBar(max_value=progressbar.UnknownLength, widgets=[progressbar.Timer(), ' ', progressbar.BouncingBar(left='[', right=']', marker='█')]) @@ -358,6 +367,7 @@ class PenalizingCausalTransformer(CausalTransformer): # Initialize super().__init__(config) def generate_static(state, key, ctx, ctx_length, gen_length, numseqs_aux, sampler_options, soft_embeddings=None): + compiling_callback() numseqs = numseqs_aux.shape[0] # These are the tokens that we don't want the AI to ever write self.badwords = jnp.array([6880, 50256, 42496, 4613, 17414, 22039, 16410, 27, 29, 38430, 37922, 15913, 24618, 28725, 58, 47175, 36937, 26700, 12878, 16471, 37981, 5218, 29795, 13412, 45160, 3693, 49778, 4211, 20598, 36475, 33409, 44167, 32406, 29847, 29342, 42669, 685, 25787, 7359, 3784, 5320, 33994, 33490, 34516, 43734, 17635, 24293, 9959, 23785, 21737, 28401, 18161, 26358, 32509, 1279, 38155, 18189, 26894, 6927, 14610, 23834, 11037, 14631, 26933, 46904, 22330, 25915, 47934, 38214, 1875, 14692, 41832, 13163, 25970, 29565, 44926, 19841, 37250, 49029, 9609, 44438, 16791, 17816, 30109, 41888, 47527, 42924, 23984, 49074, 33717, 31161, 49082, 30138, 31175, 12240, 14804, 7131, 26076, 33250, 3556, 38381, 36338, 32756, 46581, 17912, 49146]) @@ -452,6 +462,7 @@ class PenalizingCausalTransformer(CausalTransformer): axis_resources={'shard': 'mp', 'batch': 'dp'}, ) def generate_initial(state, key, ctx, ctx_length, numseqs_aux, soft_embeddings=None): + compiling_callback() numseqs = numseqs_aux.shape[0] @hk.transform def generate_initial_inner(context, ctx_length): @@ -552,6 +563,7 @@ class PenalizingCausalTransformer(CausalTransformer): n_generated = 0 regeneration_required = False halt = False + started_compiling_callback() generate_data, sample_key = self.generate_initial_xmap(self.state, jnp.array(key.take(batch_size)), ctx, ctx_length, numseqs_aux, soft_embeddings) sample_key = np.asarray(sample_key[0, 0]) while True: @@ -574,13 +586,15 @@ class PenalizingCausalTransformer(CausalTransformer): break else: break + stopped_compiling_callback() return sample_data, n_generated, regeneration_required, halt def generate_static(self, ctx, ctx_length, gen_length, numseqs, sampler_options, return_logits=False, soft_embeddings=None): assert not return_logits key = hk.PRNGSequence(random.randint(0, 2 ** 60)) batch_size = ctx.shape[0] self.batch_size = batch_size - return self.generate_static_xmap( + started_compiling_callback() + result = self.generate_static_xmap( self.state, jnp.array(key.take(batch_size)), ctx, @@ -590,6 +604,8 @@ class PenalizingCausalTransformer(CausalTransformer): sampler_options, soft_embeddings, ) + stopped_compiling_callback() + return result def infer_dynamic( From bc3141b2e61abcae7881cd998f716cc7d3cccecc Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 17 Jan 2022 05:16:03 +0100 Subject: [PATCH 2/5] Easier Dropdown support for Git Colab does not allow to define labels for the modes, so to keep things simple for a version selector United is now capitalized and Stable has been added behaving the same as leaving it empty. This does potentially break all the dev colab's, so if you forked one make sure it uses United with a capital U for maximum compatibility. --- colabkobold.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/colabkobold.sh b/colabkobold.sh index 2f43f240..33b8fc73 100644 --- a/colabkobold.sh +++ b/colabkobold.sh @@ -89,7 +89,10 @@ if [ "$init" != "skip" ]; then else cd /content if [ ! -z ${git+x} ]; then - if [ "$git" == "united" ]; then + if [ "$git" == "Stable" ]; then + git clone https://github.com/koboldai/KoboldAI-Client + fi + if [ "$git" == "United" ]; then git clone https://github.com/henk717/KoboldAI-Client fi git clone $git From e69c76c397b13fe6e7ea193a237be3199099d97d Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 17 Jan 2022 05:22:29 +0100 Subject: [PATCH 3/5] Update colabkobold.sh --- colabkobold.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/colabkobold.sh b/colabkobold.sh index 33b8fc73..56c46e41 100644 --- a/colabkobold.sh +++ b/colabkobold.sh @@ -89,12 +89,15 @@ if [ "$init" != "skip" ]; then else cd /content if [ ! -z ${git+x} ]; then - if [ "$git" == "Stable" ]; then + if [ "$git" == "Official" ]; then git clone https://github.com/koboldai/KoboldAI-Client fi if [ "$git" == "United" ]; then git clone https://github.com/henk717/KoboldAI-Client fi + if [ "$git" == "united" ]; then + git clone https://github.com/henk717/KoboldAI-Client + fi git clone $git else git clone https://github.com/koboldai/KoboldAI-Client From 01b4e0f945d91799e12bd05f478f762ba3c18de2 Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 17 Jan 2022 06:12:53 +0100 Subject: [PATCH 4/5] No File Allocation Why allocate the space for a file we are downloading if there is no chance we run out of space? --- colabkobold.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/colabkobold.sh b/colabkobold.sh index 56c46e41..3c789199 100644 --- a/colabkobold.sh +++ b/colabkobold.sh @@ -159,7 +159,7 @@ fi #Download routine for Aria2c scripts if [ ! -z ${aria2+x} ]; then apt install aria2 -y - curl -L $aria2 | aria2c -c -i- -d$dloc --user-agent=KoboldAI + curl -L $aria2 | aria2c -c -i- -d$dloc --user-agent=KoboldAI --file-allocation=none fi #Extract the model with 7z From 1ea70e4bf69ac71276304c883aa1e9a59797c1d1 Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 17 Jan 2022 15:06:05 +0100 Subject: [PATCH 5/5] GPU colab --- colab/GPU.ipynb | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 colab/GPU.ipynb diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb new file mode 100644 index 00000000..24f9bcd9 --- /dev/null +++ b/colab/GPU.ipynb @@ -0,0 +1,80 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "ColabKobold GPU", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyPLrXLvdQI0tqdi6Nz8QBbj", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kX9y5koxa58q" + }, + "source": [ + "# Welcome to KoboldAI on Google Colab, GPU Edition!\n", + "This version is still a work in progress, a better description is pending.\n", + "\n", + "For the best results use the United version until 0.17 is released." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ewkXkyiFP2Hq" + }, + "source": [ + "#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n", + "%%html\n", + "Press play on the music player to keep the tab alive, then start KoboldAI below (Uses only 13MB of data)
\n", + "