diff --git a/aiserver.py b/aiserver.py index cbffe5e6..4ed27799 100644 --- a/aiserver.py +++ b/aiserver.py @@ -14,6 +14,7 @@ os.environ['EVENTLET_THREADPOOL_SIZE'] = '50' from eventlet import tpool from os import path, getcwd +import time import re import json import collections @@ -128,6 +129,8 @@ class vars: lua_deleted = set() # Set of chunk numbers that were deleted from a Lua generation modifier generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0 abort = False # Whether or not generation was aborted by clicking on the submit button during generation + compiling = False # If using a TPU Colab, this will be set to True when the TPU backend starts compiling and then set to False again + checking = False # Whether or not we are actively checking to see if TPU backend is compiling or not spfilename = "" # Filename of soft prompt to load, or an empty string if not using a soft prompt userscripts = [] # List of userscripts to load last_userscripts = [] # List of previous userscript filenames from the previous time userscripts were send via usstatitems @@ -639,7 +642,7 @@ log.setLevel(logging.ERROR) # Start flask & SocketIO print("{0}Initializing Flask... {1}".format(colors.PURPLE, colors.END), end="") -from flask import Flask, render_template, Response, request +from flask import Flask, render_template, Response, request, copy_current_request_context from flask_socketio import SocketIO, emit app = Flask(__name__) app.config['SECRET KEY'] = 'secret!' @@ -1052,6 +1055,13 @@ else: break return excluded_world_info, regeneration_required, halt + def tpumtjgenerate_compiling_callback() -> None: + print(colors.GREEN + "TPU backend compilation triggered" + colors.END) + vars.compiling = True + + def tpumtjgenerate_stopped_compiling_callback() -> None: + vars.compiling = False + # If we're running Colab or OAI, we still need a tokenizer. if(vars.model == "Colab"): from transformers import GPT2TokenizerFast @@ -1066,6 +1076,8 @@ else: import tpu_mtj_backend tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback + tpu_mtj_backend.compiling_callback = tpumtjgenerate_compiling_callback + tpu_mtj_backend.stopped_compiling_callback = tpumtjgenerate_stopped_compiling_callback tpu_mtj_backend.load_model(vars.custmodpth) vars.allowsp = True vars.modeldim = int(tpu_mtj_backend.params["d_model"]) @@ -1643,6 +1655,7 @@ def execute_genmod(): vars.lua_koboldbridge.execute_genmod() def execute_outmod(): + emit('from_server', {'cmd': 'hidemsg', 'data': ''}, broadcast=True) try: tpool.execute(vars.lua_koboldbridge.execute_outmod) except lupa.LuaError as e: @@ -2259,6 +2272,18 @@ def settingschanged(): #==================================================================# # Take input text from SocketIO and decide what to do with it #==================================================================# + +def check_for_backend_compilation(): + if(vars.checking): + return + vars.checking = True + for _ in range(31): + time.sleep(0.06276680299820175) + if(vars.compiling): + emit('from_server', {'cmd': 'warnmsg', 'data': 'Compiling TPU backend—this usually takes 1–2 minutes...'}, broadcast=True) + break + vars.checking = False + def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, disable_recentrng=False): # Ignore new submissions if the AI is currently busy if(vars.aibusy): @@ -2972,6 +2997,8 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): global past + socketio.start_background_task(copy_current_request_context(check_for_backend_compilation)) + if(vars.dynamicscan or (not vars.nogenmod and vars.has_genmod)): context = np.tile(np.uint32(txt), (vars.numseqs, 1)) diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb new file mode 100644 index 00000000..24f9bcd9 --- /dev/null +++ b/colab/GPU.ipynb @@ -0,0 +1,80 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "ColabKobold GPU", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyPLrXLvdQI0tqdi6Nz8QBbj", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kX9y5koxa58q" + }, + "source": [ + "# Welcome to KoboldAI on Google Colab, GPU Edition!\n", + "This version is still a work in progress, a better description is pending.\n", + "\n", + "For the best results use the United version until 0.17 is released." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ewkXkyiFP2Hq" + }, + "source": [ + "#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n", + "%%html\n", + "Press play on the music player to keep the tab alive, then start KoboldAI below (Uses only 13MB of data)
\n", + "