Added support for running model remotely on Google Colab

This commit is contained in:
KoboldAI Dev 2021-05-13 18:58:52 -04:00
parent 0b113a75b4
commit 3c0638bc73
1 changed files with 77 additions and 8 deletions

View File

@ -42,7 +42,8 @@ modellist = [
["GPT-2 XL", "gpt2-xl", "16GB"], ["GPT-2 XL", "gpt2-xl", "16GB"],
["InferKit API (requires API key)", "InferKit", ""], ["InferKit API (requires API key)", "InferKit", ""],
["Custom Neo (eg Neo-horni)", "NeoCustom", ""], ["Custom Neo (eg Neo-horni)", "NeoCustom", ""],
["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""] ["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""],
["Google Colab", "Colab", ""]
] ]
# Variables # Variables
@ -69,6 +70,7 @@ class vars:
mode = "play" # Whether the interface is in play, memory, or edit mode mode = "play" # Whether the interface is in play, memory, or edit mode
editln = 0 # Which line was last selected in Edit Mode editln = 0 # Which line was last selected in Edit Mode
url = "https://api.inferkit.com/v1/models/standard/generate" # InferKit API URL url = "https://api.inferkit.com/v1/models/standard/generate" # InferKit API URL
colaburl = "" # Ngrok url for Google Colab mode
apikey = "" # API key to use for InferKit API calls apikey = "" # API key to use for InferKit API calls
savedir = getcwd()+"\stories" savedir = getcwd()+"\stories"
hascuda = False # Whether torch has detected CUDA on the system hascuda = False # Whether torch has detected CUDA on the system
@ -134,7 +136,7 @@ print("{0}Welcome to the KoboldAI Client!\nSelect an AI model to continue:{1}\n"
getModelSelection() getModelSelection()
# If transformers model was selected & GPU available, ask to use CPU or GPU # If transformers model was selected & GPU available, ask to use CPU or GPU
if(vars.model != "InferKit" and vars.hascuda): if((not vars.model in ["InferKit", "Colab"]) and vars.hascuda):
print("{0}Use GPU or CPU for generation?: (Default GPU){1}\n".format(colors.CYAN, colors.END)) print("{0}Use GPU or CPU for generation?: (Default GPU){1}\n".format(colors.CYAN, colors.END))
print(" 1 - GPU\n 2 - CPU\n") print(" 1 - GPU\n 2 - CPU\n")
genselected = False genselected = False
@ -185,6 +187,11 @@ if(vars.model == "InferKit"):
finally: finally:
file.close() file.close()
# Ask for ngrok url if Google Colab was selected
if(vars.model == "Colab"):
print("{0}Please enter the ngrok.io URL displayed in Google Colab:{1}\n".format(colors.CYAN, colors.END))
vars.colaburl = input("URL> ") + "/request"
# Set logging level to reduce chatter from Flask # Set logging level to reduce chatter from Flask
import logging import logging
log = logging.getLogger('werkzeug') log = logging.getLogger('werkzeug')
@ -200,7 +207,7 @@ socketio = SocketIO(app)
print("{0}OK!{1}".format(colors.GREEN, colors.END)) print("{0}OK!{1}".format(colors.GREEN, colors.END))
# Start transformers and create pipeline # Start transformers and create pipeline
if(vars.model != "InferKit"): if(not vars.model in ["InferKit", "Colab"]):
if(not vars.noai): if(not vars.noai):
print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END)) print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM
@ -236,6 +243,10 @@ if(vars.model != "InferKit"):
else: else:
# Import requests library for HTTPS calls # Import requests library for HTTPS calls
import requests import requests
# If we're running Colab, we still need a tokenizer.
if(vars.model == "Colab"):
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
# Set up Flask routes # Set up Flask routes
@app.route('/') @app.route('/')
@ -604,11 +615,19 @@ def calcsubmit(txt):
# Send completed bundle to generator # Send completed bundle to generator
ln = len(tokens) ln = len(tokens)
if(vars.model != "Colab"):
generate ( generate (
tokenizer.decode(tokens), tokenizer.decode(tokens),
ln+1, ln+1,
ln+vars.genamt ln+vars.genamt
) )
else:
sendtocolab(
tokenizer.decode(tokens),
ln+1,
ln+vars.genamt
)
# For InferKit web API # For InferKit web API
else: else:
@ -685,6 +704,56 @@ def generate(txt, min, max):
set_aibusy(0) set_aibusy(0)
#==================================================================#
# Send transformers-style request to ngrok/colab host
#==================================================================#
def sendtocolab(txt, min, max):
# Log request to console
print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
# Build request JSON data
reqdata = {
'text': txt,
'min': min,
'max': max,
'rep_pen': vars.rep_pen,
'temperature': vars.temp,
'top_p': vars.top_p
}
# Create request
req = requests.post(
vars.colaburl,
json = reqdata
)
# Deal with the response
if(req.status_code == 200):
genout = req.json()["data"]["text"]
print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
# Format output before continuing
genout = applyoutputformatting(getnewcontent(genout))
# Add formatted text to Actions array and refresh the game screen
vars.actions.append(genout)
refresh_story()
emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)})
set_aibusy(0)
else:
# Send error message to web client
er = req.json()
if("error" in er):
code = er["error"]["extensions"]["code"]
elif("errors" in er):
code = er["errors"][0]["extensions"]["code"]
errmsg = "InferKit API Error: {0} - {1}".format(req.status_code, code)
emit('from_server', {'cmd': 'errmsg', 'data': errmsg})
set_aibusy(0)
#==================================================================# #==================================================================#
# Replaces returns and newlines with HTML breaks # Replaces returns and newlines with HTML breaks
#==================================================================# #==================================================================#