diff --git a/aiserver.py b/aiserver.py index 047f0868..028b20ab 100644 --- a/aiserver.py +++ b/aiserver.py @@ -66,6 +66,8 @@ class vars: andepth = 3 # How far back in history to append author's note actions = [] worldinfo = [] + badwords = [] + badwordsids = [] deletewi = -1 # Temporary storage for index to delete mode = "play" # Whether the interface is in play, memory, or edit mode editln = 0 # Which line was last selected in Edit Mode @@ -114,6 +116,16 @@ def getModelSelection(): print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END)) getModelSelection() +#==================================================================# +# Return all keys in tokenizer dictionary containing char +#==================================================================# +def gettokenids(char): + keys = [] + for key in vocab_keys: + if(key.find(char) != -1): + keys.append(key) + return keys + #==================================================================# # Startup #==================================================================# @@ -238,6 +250,13 @@ if(not vars.model in ["InferKit", "Colab"]): else: generator = pipeline('text-generation', model=vars.model) + # Suppress Author's Note by flagging square brackets + vocab = tokenizer.get_vocab() + vocab_keys = vocab.keys() + vars.badwords = gettokenids("[") + for key in vars.badwords: + vars.badwordsids.append([vocab[key]]) + print("{0}OK! {1} pipeline created!{2}".format(colors.GREEN, vars.model, colors.END)) else: # If we're running Colab, we still need a tokenizer. @@ -512,6 +531,7 @@ def actionsubmit(data): vars.prompt = data # Clear the startup text from game screen emit('from_server', {'cmd': 'updatescreen', 'data': 'Please wait, generating story...'}) + calcsubmit(data) # Run the first action through the generator else: # Dont append submission if it's a blank/continue action @@ -528,7 +548,6 @@ def actionsubmit(data): # Take submitted text and build the text to be given to generator #==================================================================# def calcsubmit(txt): - vars.lastact = txt # Store most recent action in memory (is this still needed?) anotetxt = "" # Placeholder for Author's Note text lnanote = 0 # Placeholder for Author's Note length forceanote = False # In case we don't have enough actions to hit A.N. depth @@ -608,13 +627,15 @@ def calcsubmit(txt): # Did we get to add the A.N.? If not, do it here if(anotetxt != ""): if((not anoteadded) or forceanote): - tokens = memtokens + anotetkns + prompttkns + tokens + tokens = memtokens + witokens + anotetkns + prompttkns + tokens else: - tokens = memtokens + prompttkns + tokens + tokens = memtokens + witokens + prompttkns + tokens else: # Prepend Memory, WI, and Prompt before action tokens tokens = memtokens + witokens + prompttkns + tokens + + # Send completed bundle to generator ln = len(tokens) @@ -680,11 +701,6 @@ def generate(txt, min, max): if(vars.hascuda and vars.usegpu): torch.cuda.empty_cache() - # Suppress Author's Note by flagging square brackets - bad_words = [] - bad_words.append(tokenizer("[", add_prefix_space=True).input_ids) - bad_words.append(tokenizer("[", add_prefix_space=False).input_ids) - # Submit input text to generator genout = generator( txt, @@ -694,7 +710,8 @@ def generate(txt, min, max): repetition_penalty=vars.rep_pen, top_p=vars.top_p, temperature=vars.temp, - bad_words_ids=bad_words + bad_words_ids=vars.badwordsids, + use_cache=True )[0]["generated_text"] print("{0}{1}{2}".format(colors.CYAN, genout, colors.END)) @@ -748,6 +765,11 @@ def sendtocolab(txt, min, max): refresh_story() emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)}) + set_aibusy(0) + elif(req.status_code == 500): + errmsg = "Colab API Error: Failed to get a reply from the server. Please check the colab console." + print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}) set_aibusy(0) else: # Send error message to web client @@ -756,8 +778,9 @@ def sendtocolab(txt, min, max): code = er["error"]["extensions"]["code"] elif("errors" in er): code = er["errors"][0]["extensions"]["code"] - + errmsg = "Colab API Error: {0} - {1}".format(req.status_code, code) + print("{0}{1}{2}".format(colors.RED, errmsg, colors.END)) emit('from_server', {'cmd': 'errmsg', 'data': errmsg}) set_aibusy(0) @@ -774,12 +797,11 @@ def formatforhtml(txt): def getnewcontent(txt): ln = len(vars.actions) if(ln == 0): - delim = vars.prompt + lastact = tokenizer.encode(vars.prompt) else: - delim = vars.actions[-1] + lastact = tokenizer.encode(vars.actions[-1]) - # Fix issue with tokenizer replacing space+period with period - delim = delim.replace(" .", ".") + delim = tokenizer.decode(lastact) split = txt.split(delim) @@ -1216,6 +1238,7 @@ def importRequest(): file = open(importpath, "rb") vars.importjs = json.load(file) + # If a bundle file is being imported, select just the Adventures object if type(vars.importjs) is dict and "stories" in vars.importjs: vars.importjs = vars.importjs["stories"] @@ -1259,6 +1282,7 @@ def importgame(): # Copy game contents to vars vars.gamestarted = True + # Support for different versions of export script if("actions" in ref): if(len(ref["actions"]) > 0): vars.prompt = ref["actions"][0]["text"] diff --git a/readme.txt b/readme.txt index fd1ae916..5e67a05d 100644 --- a/readme.txt +++ b/readme.txt @@ -1,24 +1,22 @@ -Thanks for checking out the KoboldAI Client! Keep up with news and updates on the subreddit: +Thanks for checking out the KoboldAI Client! Get support and updates on the subreddit: https://www.reddit.com/r/KoboldAI/ [ABOUT] -This is a browser front-end for playing with multiple local & remote AI models. -The purpose is to provide a smoother, web-based UI experience than the various command-line AI apps. -I'm pushing this out now that the major quality-of-life fearures have been roughed in (generate, -undo, edit-by-line, memory, save/load, etc), which means there will probably be bugs. +This is a browser-based front-end for AI-assisted writing with multiple local & remote AI models. +It offers the standard array of tools, including Memory, Author's Note, World Info, Save & Load, +adjustable AI settings, formatting options, and the ability to import exising AI Dungeon adventures. +Current UI Snapshot: https://imgur.com/mjk5Yre -This application uses Transformers (https://huggingface.co/transformers/) to interact with the AI models -via Tensorflow. Tensorflow has CUDA/GPU support for shorter generation times, but I do not have anything -in this test release to set up CUDA/GPU support on your system. If you have a high-end GPU with -sufficient VRAM to run your model of choice, see (https://www.tensorflow.org/install/gpu) for -instructions on enabling GPU support. +For local generation, KoboldAI uses Transformers (https://huggingface.co/transformers/) to interact +with the AI models. This can be done either on CPU, or GPU with sufficient hardware. If you have a +high-end GPU with sufficient VRAM to run your model of choice, see +(https://www.tensorflow.org/install/gpu) for instructions on enabling GPU support. Transformers/Tensorflow can still be used on CPU if you do not have high-end hardware, but generation -times will be much longer. Alternatively, KoboldAI also supports InferKit (https://inferkit.com/). -This will allow you to send requests to a remotely hosted Megatron-11b model for fast generation times -on any hardware. This is a paid service, but signing up for a free account will let you generate up -to 40,000 characters, and the free account will work with KoboldAI. +times will be much longer. Alternatively, KoboldAI also supports utilizing remotely-hosted models. +The currently supported remote APIs are InferKit and Google Colab, see the dedicated sections below +for more info on these. [SETUP] @@ -33,17 +31,6 @@ to 40,000 characters, and the free account will work with KoboldAI. 5. Select a model from the list. Flask will start and give you a message that it's ready to connect. 6. Open a web browser and enter http://127.0.0.1:5000/ -[FOR INFERKIT INTEGRATION] - -If you would like to use InferKit's Megatron-11b model, sign up for a free account on their website. -https://inferkit.com/ -After verifying your email address, sign in and click on your profile picture in the top right. -In the drop down menu, click "API Key". -On the API Key page, click "Reveal API Key" and copy it. When starting KoboldAI and selecting the -InferKit API model, you will be asked to paste your API key into the terminal. After entering, -the API key will be stored in the client.settings file for future use. -You can see your remaining budget for generated characters on their website under "Billing & Usage". - [ENABLE COLORS IN WINDOWS 10 COMMAND LINE] If you see strange numeric tags in the console output, then your console of choice does not have @@ -79,4 +66,15 @@ Select an Adventure and click the Accept button. If your computer does not have an 8GB GPU to run GPT-Neo locally, you can now run a Google Colab notebook hosting a GPT-Neo-2.7B model remotely and connect to it using the KoboldAI client. See the instructions on the Colab at the link below: -https://colab.research.google.com/drive/1uGe9f4ruIQog3RLxfUsoThakvLpHjIkX?usp=sharing \ No newline at end of file +https://colab.research.google.com/drive/1uGe9f4ruIQog3RLxfUsoThakvLpHjIkX?usp=sharing + +[FOR INFERKIT INTEGRATION] + +If you would like to use InferKit's Megatron-11b model, sign up for a free account on their website. +https://inferkit.com/ +After verifying your email address, sign in and click on your profile picture in the top right. +In the drop down menu, click "API Key". +On the API Key page, click "Reveal API Key" and copy it. When starting KoboldAI and selecting the +InferKit API model, you will be asked to paste your API key into the terminal. After entering, +the API key will be stored in the client.settings file for future use. +You can see your remaining budget for generated characters on their website under "Billing & Usage". \ No newline at end of file