From 3c0638bc733886c80667024c9b225a81377fde26 Mon Sep 17 00:00:00 2001 From: KoboldAI Dev Date: Thu, 13 May 2021 18:58:52 -0400 Subject: [PATCH] Added support for running model remotely on Google Colab --- aiserver.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/aiserver.py b/aiserver.py index d6899488..28cd3ce5 100644 --- a/aiserver.py +++ b/aiserver.py @@ -42,7 +42,8 @@ modellist = [ ["GPT-2 XL", "gpt2-xl", "16GB"], ["InferKit API (requires API key)", "InferKit", ""], ["Custom Neo (eg Neo-horni)", "NeoCustom", ""], - ["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""] + ["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""], + ["Google Colab", "Colab", ""] ] # Variables @@ -69,6 +70,7 @@ class vars: mode = "play" # Whether the interface is in play, memory, or edit mode editln = 0 # Which line was last selected in Edit Mode url = "https://api.inferkit.com/v1/models/standard/generate" # InferKit API URL + colaburl = "" # Ngrok url for Google Colab mode apikey = "" # API key to use for InferKit API calls savedir = getcwd()+"\stories" hascuda = False # Whether torch has detected CUDA on the system @@ -134,7 +136,7 @@ print("{0}Welcome to the KoboldAI Client!\nSelect an AI model to continue:{1}\n" getModelSelection() # If transformers model was selected & GPU available, ask to use CPU or GPU -if(vars.model != "InferKit" and vars.hascuda): +if((not vars.model in ["InferKit", "Colab"]) and vars.hascuda): print("{0}Use GPU or CPU for generation?: (Default GPU){1}\n".format(colors.CYAN, colors.END)) print(" 1 - GPU\n 2 - CPU\n") genselected = False @@ -185,6 +187,11 @@ if(vars.model == "InferKit"): finally: file.close() +# Ask for ngrok url if Google Colab was selected +if(vars.model == "Colab"): + print("{0}Please enter the ngrok.io URL displayed in Google Colab:{1}\n".format(colors.CYAN, colors.END)) + vars.colaburl = input("URL> ") + "/request" + # Set logging level to reduce chatter from Flask import logging log = logging.getLogger('werkzeug') @@ -200,7 +207,7 @@ socketio = SocketIO(app) print("{0}OK!{1}".format(colors.GREEN, colors.END)) # Start transformers and create pipeline -if(vars.model != "InferKit"): +if(not vars.model in ["InferKit", "Colab"]): if(not vars.noai): print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END)) from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM @@ -236,6 +243,10 @@ if(vars.model != "InferKit"): else: # Import requests library for HTTPS calls import requests + # If we're running Colab, we still need a tokenizer. + if(vars.model == "Colab"): + from transformers import GPT2Tokenizer + tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B") # Set up Flask routes @app.route('/') @@ -604,11 +615,19 @@ def calcsubmit(txt): # Send completed bundle to generator ln = len(tokens) - generate ( - tokenizer.decode(tokens), - ln+1, - ln+vars.genamt - ) + + if(vars.model != "Colab"): + generate ( + tokenizer.decode(tokens), + ln+1, + ln+vars.genamt + ) + else: + sendtocolab( + tokenizer.decode(tokens), + ln+1, + ln+vars.genamt + ) # For InferKit web API else: @@ -685,6 +704,56 @@ def generate(txt, min, max): set_aibusy(0) +#==================================================================# +# Send transformers-style request to ngrok/colab host +#==================================================================# +def sendtocolab(txt, min, max): + # Log request to console + print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END)) + + # Build request JSON data + reqdata = { + 'text': txt, + 'min': min, + 'max': max, + 'rep_pen': vars.rep_pen, + 'temperature': vars.temp, + 'top_p': vars.top_p + } + + # Create request + req = requests.post( + vars.colaburl, + json = reqdata + ) + + # Deal with the response + if(req.status_code == 200): + genout = req.json()["data"]["text"] + print("{0}{1}{2}".format(colors.CYAN, genout, colors.END)) + + # Format output before continuing + genout = applyoutputformatting(getnewcontent(genout)) + + # Add formatted text to Actions array and refresh the game screen + vars.actions.append(genout) + refresh_story() + emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)}) + + set_aibusy(0) + else: + # Send error message to web client + er = req.json() + if("error" in er): + code = er["error"]["extensions"]["code"] + elif("errors" in er): + code = er["errors"][0]["extensions"]["code"] + + errmsg = "InferKit API Error: {0} - {1}".format(req.status_code, code) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}) + set_aibusy(0) + + #==================================================================# # Replaces returns and newlines with HTML breaks #==================================================================#