Added support for running model remotely on Google Colab

2025-06-05 21:59:24 +02:00 · 2021-05-13 18:58:52 -04:00
parent 0b113a75b4
commit 3c0638bc73
1 changed files with 77 additions and 8 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -42,7 +42,8 @@ modellist = [
    ["GPT-2 XL", "gpt2-xl", "16GB"],
    ["InferKit API (requires API key)", "InferKit", ""],
    ["Custom Neo   (eg Neo-horni)", "NeoCustom", ""],
-    ["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""]
+    ["Custom GPT-2 (eg CloverEdition)", "GPT2Custom", ""],
    ["Google Colab", "Colab", ""]
    ]
 # Variables
@@ -69,6 +70,7 @@ class vars:
    mode        = "play" # Whether the interface is in play, memory, or edit mode
    editln      = 0      # Which line was last selected in Edit Mode
    url         = "https://api.inferkit.com/v1/models/standard/generate" # InferKit API URL
    colaburl    = ""     # Ngrok url for Google Colab mode
    apikey      = ""     # API key to use for InferKit API calls
    savedir     = getcwd()+"\stories"
    hascuda     = False  # Whether torch has detected CUDA on the system
@@ -134,7 +136,7 @@ print("{0}Welcome to the KoboldAI Client!\nSelect an AI model to continue:{1}\n"
 getModelSelection()
 # If transformers model was selected & GPU available, ask to use CPU or GPU
-if(vars.model != "InferKit" and vars.hascuda):
+if((not vars.model in ["InferKit", "Colab"]) and vars.hascuda):
    print("{0}Use GPU or CPU for generation?:  (Default GPU){1}\n".format(colors.CYAN, colors.END))
    print("    1 - GPU\n    2 - CPU\n")
    genselected = False
@@ -185,6 +187,11 @@ if(vars.model == "InferKit"):
            finally:
                file.close()
 # Ask for ngrok url if Google Colab was selected
 if(vars.model == "Colab"):
    print("{0}Please enter the ngrok.io URL displayed in Google Colab:{1}\n".format(colors.CYAN, colors.END))
    vars.colaburl = input("URL> ") + "/request"
 # Set logging level to reduce chatter from Flask
 import logging
 log = logging.getLogger('werkzeug')
@@ -200,7 +207,7 @@ socketio = SocketIO(app)
 print("{0}OK!{1}".format(colors.GREEN, colors.END))
 # Start transformers and create pipeline
-if(vars.model != "InferKit"):
+if(not vars.model in ["InferKit", "Colab"]):
    if(not vars.noai):
        print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
        from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM
@@ -236,6 +243,10 @@ if(vars.model != "InferKit"):
 else:
    # Import requests library for HTTPS calls
    import requests
    # If we're running Colab, we still need a tokenizer.
    if(vars.model == "Colab"):
        from transformers import GPT2Tokenizer
        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
 # Set up Flask routes
@app.route('/')
@@ -604,11 +615,19 @@ def calcsubmit(txt):
            # Send completed bundle to generator
            ln = len(tokens)
-            generate (
+            
-                tokenizer.decode(tokens),
+            if(vars.model != "Colab"):
-                ln+1,
+                generate (
-                ln+vars.genamt
+                    tokenizer.decode(tokens),
-                )
+                    ln+1,
                    ln+vars.genamt
                    )
            else:
                sendtocolab(
                    tokenizer.decode(tokens),
                    ln+1,
                    ln+vars.genamt
                    )
    # For InferKit web API
    else:
@@ -685,6 +704,56 @@ def generate(txt, min, max):
    set_aibusy(0)
 #==================================================================#
 #  Send transformers-style request to ngrok/colab host
 #==================================================================#
 def sendtocolab(txt, min, max):
    # Log request to console
    print("{0}Len:{1}, Txt:{2}{3}".format(colors.YELLOW, len(txt), txt, colors.END))
    # Build request JSON data
    reqdata = {
        'text': txt,
        'min': min,
        'max': max,
        'rep_pen': vars.rep_pen,
        'temperature': vars.temp,
        'top_p': vars.top_p
    }
    # Create request
    req = requests.post(
        vars.colaburl, 
        json = reqdata
        )
    # Deal with the response
    if(req.status_code == 200):
        genout = req.json()["data"]["text"]
        print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
        # Format output before continuing
        genout = applyoutputformatting(getnewcontent(genout))
        # Add formatted text to Actions array and refresh the game screen
        vars.actions.append(genout)
        refresh_story()
        emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)})
        set_aibusy(0)
    else:
        # Send error message to web client
        er = req.json()
        if("error" in er):
            code = er["error"]["extensions"]["code"]
        elif("errors" in er):
            code = er["errors"][0]["extensions"]["code"]
        errmsg = "InferKit API Error: {0} - {1}".format(req.status_code, code)
        emit('from_server', {'cmd': 'errmsg', 'data': errmsg})
        set_aibusy(0)
 #==================================================================#
 # Replaces returns and newlines with HTML breaks
 #==================================================================#