diff --git a/aiserver.py b/aiserver.py index aae9c84b..00ca38ec 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6,7 +6,12 @@ #==================================================================# # External packages +import eventlet +eventlet.monkey_patch() import os +os.environ['EVENTLET_THREADPOOL_SIZE'] = '1' +from eventlet import tpool + from os import path, getcwd import re import tkinter as tk @@ -559,7 +564,7 @@ from flask import Flask, render_template, Response, request from flask_socketio import SocketIO, emit app = Flask(__name__) app.config['SECRET KEY'] = 'secret!' -socketio = SocketIO(app) +socketio = SocketIO(app, async_method="eventlet") print("{0}OK!{1}".format(colors.GREEN, colors.END)) # Start transformers and create pipeline @@ -990,8 +995,8 @@ def load_lua_scripts(): try: vars.lua_koboldbridge.obliterate_multiverse() - vars.lua_koboldbridge.load_corescript("default.lua") - vars.lua_koboldbridge.load_userscripts(filenames, modulenames, descriptions) + tpool.execute(vars.lua_koboldbridge.load_corescript, "default.lua") + tpool.execute(vars.lua_koboldbridge.load_userscripts, filenames, modulenames, descriptions) except lupa.LuaError as e: vars.lua_koboldbridge.obliterate_multiverse() if(vars.serverstarted): @@ -1293,7 +1298,7 @@ def lua_is_custommodel(): def execute_inmod(): vars.lua_logname = ... try: - vars.lua_koboldbridge.execute_inmod() + tpool.execute(vars.lua_koboldbridge.execute_inmod) except lupa.LuaError as e: vars.lua_koboldbridge.obliterate_multiverse() emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error, please check console.'}, broadcast=True) @@ -1307,7 +1312,7 @@ def execute_genmod(): def execute_outmod(): try: - vars.lua_koboldbridge.execute_outmod() + tpool.execute(vars.lua_koboldbridge.execute_outmod) except lupa.LuaError as e: vars.lua_koboldbridge.obliterate_multiverse() emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error, please check console.'}, broadcast=True) @@ -1315,6 +1320,9 @@ def execute_outmod(): print("{0}{1}{2}".format(colors.RED, str(e).replace("\033", ""), colors.END), file=sys.stderr) print("{0}{1}{2}".format(colors.YELLOW, "Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.", colors.END), file=sys.stderr) set_aibusy(0) + if(vars.lua_koboldbridge.resend_settings_required): + vars.lua_koboldbridge.resend_settings_required = False + lua_resend_settings() #==================================================================# # Lua runtime startup @@ -1348,7 +1356,6 @@ bridged = { "has_setting": lua_has_setting, "get_setting": lua_get_setting, "set_setting": lua_set_setting, - "resend_settings": lua_resend_settings, "set_chunk": lua_set_chunk, "get_modeltype": lua_get_modeltype, "get_modelbackend": lua_get_modelbackend, @@ -2157,100 +2164,106 @@ def calcsubmit(txt): #==================================================================# # Send text to generator and deal with output #==================================================================# + +def _generate(txt, minimum, maximum, found_entries): + gen_in = tokenizer.encode(txt, return_tensors="pt", truncation=True).long() + if(vars.sp is not None): + soft_tokens = torch.arange( + model.config.vocab_size, + model.config.vocab_size + vars.sp.shape[0], + ) + gen_in = torch.cat((soft_tokens[None], gen_in), dim=-1) + + if(vars.hascuda and vars.usegpu): + gen_in = gen_in.to(0) + elif(vars.hascuda and vars.breakmodel): + gen_in = gen_in.to(breakmodel.primary_device) + else: + gen_in = gen_in.to('cpu') + + model.kai_scanner_head_length = gen_in.shape[-1] + model.kai_scanner_excluded_world_info = found_entries + + actions = vars.actions + if(vars.dynamicscan): + actions = actions.copy() + + with torch.no_grad(): + already_generated = 0 + numseqs = vars.numseqs + while True: + genout = generator( + gen_in, + do_sample=True, + min_length=minimum, + max_length=maximum-already_generated, + repetition_penalty=vars.rep_pen, + bad_words_ids=vars.badwordsids, + use_cache=True, + num_return_sequences=numseqs + ) + already_generated += len(genout[0]) - len(gen_in[0]) + if(model.kai_scanner.halt or not model.kai_scanner.regeneration_required): + break + assert genout.ndim >= 2 + assert genout.shape[0] == vars.numseqs + if(already_generated != vars.lua_koboldbridge.generated_cols): + raise RuntimeError("WI scanning error") + for r in range(vars.numseqs): + for c in range(already_generated): + assert vars.lua_koboldbridge.generated[r+1][c+1] is not None + genout[r][genout.shape[-1] - already_generated - c] = vars.lua_koboldbridge.generated[r+1][c+1] + encoded = [] + for i in range(vars.numseqs): + txt = tokenizer.decode(genout[i, -already_generated:]) + winfo, mem, anotetxt, _found_entries = calcsubmitbudgetheader(txt, force_use_txt=True) + found_entries[i].update(_found_entries) + txt, _, _ = calcsubmitbudget(len(actions), winfo, mem, anotetxt, actions) + encoded.append(tokenizer.encode(txt, return_tensors="pt", truncation=True)[0].long().to(genout.device)) + max_length = len(max(encoded, key=len)) + encoded = torch.stack(tuple(torch.nn.functional.pad(e, (max_length - len(e), 0), value=model.config.pad_token_id or model.config.eos_token_id) for e in encoded)) + genout = torch.cat( + ( + encoded, + genout[..., -already_generated:], + ), + dim=-1 + ) + if(vars.sp is not None): + soft_tokens = torch.arange( + model.config.vocab_size, + model.config.vocab_size + vars.sp.shape[0], + device=genout.device, + ) + genout = torch.cat((soft_tokens.tile(vars.numseqs, 1), genout), dim=-1) + diff = genout.shape[-1] - gen_in.shape[-1] + minimum += diff + maximum += diff + gen_in = genout + model.kai_scanner_head_length = encoded.shape[-1] + numseqs = 1 + + return genout, already_generated + + def generate(txt, minimum, maximum, found_entries=None): if(found_entries is None): found_entries = set() found_entries = tuple(found_entries.copy() for _ in range(vars.numseqs)) print("{0}Min:{1}, Max:{2}, Txt:{3}{4}".format(colors.YELLOW, minimum, maximum, txt, colors.END)) - + # Store context in memory to use it for comparison with generated content vars.lastctx = txt - + # Clear CUDA cache if using GPU if(vars.hascuda and (vars.usegpu or vars.breakmodel)): gc.collect() torch.cuda.empty_cache() - + # Submit input text to generator try: - gen_in = tokenizer.encode(txt, return_tensors="pt", truncation=True).long() - if(vars.sp is not None): - soft_tokens = torch.arange( - model.config.vocab_size, - model.config.vocab_size + vars.sp.shape[0], - ) - gen_in = torch.cat((soft_tokens[None], gen_in), dim=-1) - - if(vars.hascuda and vars.usegpu): - gen_in = gen_in.to(0) - elif(vars.hascuda and vars.breakmodel): - gen_in = gen_in.to(breakmodel.primary_device) - else: - gen_in = gen_in.to('cpu') - - model.kai_scanner_head_length = gen_in.shape[-1] - model.kai_scanner_excluded_world_info = found_entries - - actions = vars.actions - if(vars.dynamicscan): - actions = actions.copy() - - with torch.no_grad(): - already_generated = 0 - numseqs = vars.numseqs - while True: - genout = generator( - gen_in, - do_sample=True, - min_length=minimum, - max_length=maximum-already_generated, - repetition_penalty=vars.rep_pen, - bad_words_ids=vars.badwordsids, - use_cache=True, - num_return_sequences=numseqs - ) - already_generated += len(genout[0]) - len(gen_in[0]) - if(model.kai_scanner.halt or not model.kai_scanner.regeneration_required): - break - assert genout.ndim >= 2 - assert genout.shape[0] == vars.numseqs - if(already_generated != vars.lua_koboldbridge.generated_cols): - raise RuntimeError("WI scanning error") - for r in range(vars.numseqs): - for c in range(already_generated): - assert vars.lua_koboldbridge.generated[r+1][c+1] is not None - genout[r][genout.shape[-1] - already_generated - c] = vars.lua_koboldbridge.generated[r+1][c+1] - encoded = [] - for i in range(vars.numseqs): - txt = tokenizer.decode(genout[i, -already_generated:]) - winfo, mem, anotetxt, _found_entries = calcsubmitbudgetheader(txt, force_use_txt=True) - found_entries[i].update(_found_entries) - txt, _, _ = calcsubmitbudget(len(actions), winfo, mem, anotetxt, actions) - encoded.append(tokenizer.encode(txt, return_tensors="pt", truncation=True)[0].long().to(genout.device)) - max_length = len(max(encoded, key=len)) - encoded = torch.stack(tuple(torch.nn.functional.pad(e, (max_length - len(e), 0), value=model.config.pad_token_id or model.config.eos_token_id) for e in encoded)) - genout = torch.cat( - ( - encoded, - genout[..., -already_generated:], - ), - dim=-1 - ) - if(vars.sp is not None): - soft_tokens = torch.arange( - model.config.vocab_size, - model.config.vocab_size + vars.sp.shape[0], - device=genout.device, - ) - genout = torch.cat((soft_tokens.tile(vars.numseqs, 1), genout), dim=-1) - diff = genout.shape[-1] - gen_in.shape[-1] - minimum += diff - maximum += diff - gen_in = genout - model.kai_scanner_head_length = encoded.shape[-1] - numseqs = 1 - + genout, already_generated = tpool.execute(_generate, txt, minimum, maximum, found_entries) except Exception as e: if(issubclass(type(e), lupa.LuaError)): vars.lua_koboldbridge.obliterate_multiverse() @@ -2448,7 +2461,8 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): dtype=np.uint32 ) - genout = tpu_mtj_backend.infer( + genout = tpool.execute( + tpu_mtj_backend.infer, txt, gen_len = maximum-minimum+1, temp=vars.temp, @@ -3813,4 +3827,4 @@ if __name__ == "__main__": webbrowser.open_new('http://localhost:5000') print("{0}\nServer started!\nYou may now connect with a browser at http://127.0.0.1:5000/{1}".format(colors.GREEN, colors.END)) vars.serverstarted = True - socketio.run(app) + socketio.run(app, port=5000) diff --git a/bridge.lua b/bridge.lua index c950ba90..bcb32203 100644 --- a/bridge.lua +++ b/bridge.lua @@ -1679,9 +1679,6 @@ return function(_python, _bridged) end end end - if koboldbridge.resend_settings_required then - bridged.resend_settings() - end koboldbridge.userstate = nil return r end diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml index 6b549f9d..d3e2e8ae 100644 --- a/environments/finetuneanon.yml +++ b/environments/finetuneanon.yml @@ -10,11 +10,11 @@ dependencies: - cudatoolkit=11.1 - tensorflow-gpu - python=3.8.* - - gevent-websocket + - eventlet - pip - git - pip: - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - flask-cloudflared - flask-ngrok - - lupa \ No newline at end of file + - lupa==1.10 diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 71f420c9..462a9e22 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -11,10 +11,10 @@ dependencies: - python=3.8.* - cudatoolkit=11.1 - transformers - - gevent-websocket + - eventlet - pip - git - pip: - flask-cloudflared - flask-ngrok - - lupa \ No newline at end of file + - lupa==1.10 diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml index dfc2d6d7..53dc26c2 100644 --- a/environments/rocm-finetune.yml +++ b/environments/rocm-finetune.yml @@ -6,7 +6,7 @@ dependencies: - colorama - flask-socketio - python=3.8.* - - gevent-websocket + - eventlet - pip - git - pip: @@ -16,4 +16,4 @@ dependencies: - flask-cloudflared - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - flask-ngrok - - lupa \ No newline at end of file + - lupa==1.10 diff --git a/environments/rocm.yml b/environments/rocm.yml index 91d9522c..cae0c152 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -8,7 +8,7 @@ dependencies: - colorama - flask-socketio - python=3.8.* - - gevent-websocket + - eventlet - pip - git - pip: @@ -17,4 +17,4 @@ dependencies: - torchvision==0.11.1 - flask-cloudflared - flask-ngrok - - lupa \ No newline at end of file + - lupa==1.10 diff --git a/static/application.js b/static/application.js index 2fb196e0..e23dffc4 100644 --- a/static/application.js +++ b/static/application.js @@ -1619,7 +1619,7 @@ $(document).ready(function(){ seqselcontents = $("#seqselcontents"); // Connect to SocketIO server - socket = io.connect(window.document.origin); + socket = io.connect(window.document.origin, {transports: ['websocket', 'polling']}); socket.on('from_server', function(msg) { if(msg.cmd == "connected") { diff --git a/templates/index.html b/templates/index.html index e810eb8d..08980ef3 100644 --- a/templates/index.html +++ b/templates/index.html @@ -7,7 +7,7 @@ - +