Bugfixes:
Expanded bad_word flagging for square brackets to combat Author's Note leakage World Info should now work properly if you have an Author's Note defined Set generator to use cache to improve performance of custom Neo models Added error handling for Colab disconnections Now using tokenized & detokenized version of last action to parse out new content Updated readme
This commit is contained in:
parent
2721a5e64a
commit
3d070f057e
50
aiserver.py
50
aiserver.py
|
@ -66,6 +66,8 @@ class vars:
|
||||||
andepth = 3 # How far back in history to append author's note
|
andepth = 3 # How far back in history to append author's note
|
||||||
actions = []
|
actions = []
|
||||||
worldinfo = []
|
worldinfo = []
|
||||||
|
badwords = []
|
||||||
|
badwordsids = []
|
||||||
deletewi = -1 # Temporary storage for index to delete
|
deletewi = -1 # Temporary storage for index to delete
|
||||||
mode = "play" # Whether the interface is in play, memory, or edit mode
|
mode = "play" # Whether the interface is in play, memory, or edit mode
|
||||||
editln = 0 # Which line was last selected in Edit Mode
|
editln = 0 # Which line was last selected in Edit Mode
|
||||||
|
@ -114,6 +116,16 @@ def getModelSelection():
|
||||||
print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
|
print("{0}Select an AI model to continue:{1}\n".format(colors.CYAN, colors.END))
|
||||||
getModelSelection()
|
getModelSelection()
|
||||||
|
|
||||||
|
#==================================================================#
|
||||||
|
# Return all keys in tokenizer dictionary containing char
|
||||||
|
#==================================================================#
|
||||||
|
def gettokenids(char):
|
||||||
|
keys = []
|
||||||
|
for key in vocab_keys:
|
||||||
|
if(key.find(char) != -1):
|
||||||
|
keys.append(key)
|
||||||
|
return keys
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Startup
|
# Startup
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
|
@ -238,6 +250,13 @@ if(not vars.model in ["InferKit", "Colab"]):
|
||||||
else:
|
else:
|
||||||
generator = pipeline('text-generation', model=vars.model)
|
generator = pipeline('text-generation', model=vars.model)
|
||||||
|
|
||||||
|
# Suppress Author's Note by flagging square brackets
|
||||||
|
vocab = tokenizer.get_vocab()
|
||||||
|
vocab_keys = vocab.keys()
|
||||||
|
vars.badwords = gettokenids("[")
|
||||||
|
for key in vars.badwords:
|
||||||
|
vars.badwordsids.append([vocab[key]])
|
||||||
|
|
||||||
print("{0}OK! {1} pipeline created!{2}".format(colors.GREEN, vars.model, colors.END))
|
print("{0}OK! {1} pipeline created!{2}".format(colors.GREEN, vars.model, colors.END))
|
||||||
else:
|
else:
|
||||||
# If we're running Colab, we still need a tokenizer.
|
# If we're running Colab, we still need a tokenizer.
|
||||||
|
@ -512,6 +531,7 @@ def actionsubmit(data):
|
||||||
vars.prompt = data
|
vars.prompt = data
|
||||||
# Clear the startup text from game screen
|
# Clear the startup text from game screen
|
||||||
emit('from_server', {'cmd': 'updatescreen', 'data': 'Please wait, generating story...'})
|
emit('from_server', {'cmd': 'updatescreen', 'data': 'Please wait, generating story...'})
|
||||||
|
|
||||||
calcsubmit(data) # Run the first action through the generator
|
calcsubmit(data) # Run the first action through the generator
|
||||||
else:
|
else:
|
||||||
# Dont append submission if it's a blank/continue action
|
# Dont append submission if it's a blank/continue action
|
||||||
|
@ -528,7 +548,6 @@ def actionsubmit(data):
|
||||||
# Take submitted text and build the text to be given to generator
|
# Take submitted text and build the text to be given to generator
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
def calcsubmit(txt):
|
def calcsubmit(txt):
|
||||||
vars.lastact = txt # Store most recent action in memory (is this still needed?)
|
|
||||||
anotetxt = "" # Placeholder for Author's Note text
|
anotetxt = "" # Placeholder for Author's Note text
|
||||||
lnanote = 0 # Placeholder for Author's Note length
|
lnanote = 0 # Placeholder for Author's Note length
|
||||||
forceanote = False # In case we don't have enough actions to hit A.N. depth
|
forceanote = False # In case we don't have enough actions to hit A.N. depth
|
||||||
|
@ -608,13 +627,15 @@ def calcsubmit(txt):
|
||||||
# Did we get to add the A.N.? If not, do it here
|
# Did we get to add the A.N.? If not, do it here
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
if((not anoteadded) or forceanote):
|
if((not anoteadded) or forceanote):
|
||||||
tokens = memtokens + anotetkns + prompttkns + tokens
|
tokens = memtokens + witokens + anotetkns + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
tokens = memtokens + prompttkns + tokens
|
tokens = memtokens + witokens + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
# Prepend Memory, WI, and Prompt before action tokens
|
# Prepend Memory, WI, and Prompt before action tokens
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = memtokens + witokens + prompttkns + tokens
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Send completed bundle to generator
|
# Send completed bundle to generator
|
||||||
ln = len(tokens)
|
ln = len(tokens)
|
||||||
|
|
||||||
|
@ -680,11 +701,6 @@ def generate(txt, min, max):
|
||||||
if(vars.hascuda and vars.usegpu):
|
if(vars.hascuda and vars.usegpu):
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
# Suppress Author's Note by flagging square brackets
|
|
||||||
bad_words = []
|
|
||||||
bad_words.append(tokenizer("[", add_prefix_space=True).input_ids)
|
|
||||||
bad_words.append(tokenizer("[", add_prefix_space=False).input_ids)
|
|
||||||
|
|
||||||
# Submit input text to generator
|
# Submit input text to generator
|
||||||
genout = generator(
|
genout = generator(
|
||||||
txt,
|
txt,
|
||||||
|
@ -694,7 +710,8 @@ def generate(txt, min, max):
|
||||||
repetition_penalty=vars.rep_pen,
|
repetition_penalty=vars.rep_pen,
|
||||||
top_p=vars.top_p,
|
top_p=vars.top_p,
|
||||||
temperature=vars.temp,
|
temperature=vars.temp,
|
||||||
bad_words_ids=bad_words
|
bad_words_ids=vars.badwordsids,
|
||||||
|
use_cache=True
|
||||||
)[0]["generated_text"]
|
)[0]["generated_text"]
|
||||||
print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
|
print("{0}{1}{2}".format(colors.CYAN, genout, colors.END))
|
||||||
|
|
||||||
|
@ -748,6 +765,11 @@ def sendtocolab(txt, min, max):
|
||||||
refresh_story()
|
refresh_story()
|
||||||
emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)})
|
emit('from_server', {'cmd': 'texteffect', 'data': len(vars.actions)})
|
||||||
|
|
||||||
|
set_aibusy(0)
|
||||||
|
elif(req.status_code == 500):
|
||||||
|
errmsg = "Colab API Error: Failed to get a reply from the server. Please check the colab console."
|
||||||
|
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||||
|
emit('from_server', {'cmd': 'errmsg', 'data': errmsg})
|
||||||
set_aibusy(0)
|
set_aibusy(0)
|
||||||
else:
|
else:
|
||||||
# Send error message to web client
|
# Send error message to web client
|
||||||
|
@ -758,6 +780,7 @@ def sendtocolab(txt, min, max):
|
||||||
code = er["errors"][0]["extensions"]["code"]
|
code = er["errors"][0]["extensions"]["code"]
|
||||||
|
|
||||||
errmsg = "Colab API Error: {0} - {1}".format(req.status_code, code)
|
errmsg = "Colab API Error: {0} - {1}".format(req.status_code, code)
|
||||||
|
print("{0}{1}{2}".format(colors.RED, errmsg, colors.END))
|
||||||
emit('from_server', {'cmd': 'errmsg', 'data': errmsg})
|
emit('from_server', {'cmd': 'errmsg', 'data': errmsg})
|
||||||
set_aibusy(0)
|
set_aibusy(0)
|
||||||
|
|
||||||
|
@ -774,12 +797,11 @@ def formatforhtml(txt):
|
||||||
def getnewcontent(txt):
|
def getnewcontent(txt):
|
||||||
ln = len(vars.actions)
|
ln = len(vars.actions)
|
||||||
if(ln == 0):
|
if(ln == 0):
|
||||||
delim = vars.prompt
|
lastact = tokenizer.encode(vars.prompt)
|
||||||
else:
|
else:
|
||||||
delim = vars.actions[-1]
|
lastact = tokenizer.encode(vars.actions[-1])
|
||||||
|
|
||||||
# Fix issue with tokenizer replacing space+period with period
|
delim = tokenizer.decode(lastact)
|
||||||
delim = delim.replace(" .", ".")
|
|
||||||
|
|
||||||
split = txt.split(delim)
|
split = txt.split(delim)
|
||||||
|
|
||||||
|
@ -1216,6 +1238,7 @@ def importRequest():
|
||||||
file = open(importpath, "rb")
|
file = open(importpath, "rb")
|
||||||
vars.importjs = json.load(file)
|
vars.importjs = json.load(file)
|
||||||
|
|
||||||
|
# If a bundle file is being imported, select just the Adventures object
|
||||||
if type(vars.importjs) is dict and "stories" in vars.importjs:
|
if type(vars.importjs) is dict and "stories" in vars.importjs:
|
||||||
vars.importjs = vars.importjs["stories"]
|
vars.importjs = vars.importjs["stories"]
|
||||||
|
|
||||||
|
@ -1259,6 +1282,7 @@ def importgame():
|
||||||
# Copy game contents to vars
|
# Copy game contents to vars
|
||||||
vars.gamestarted = True
|
vars.gamestarted = True
|
||||||
|
|
||||||
|
# Support for different versions of export script
|
||||||
if("actions" in ref):
|
if("actions" in ref):
|
||||||
if(len(ref["actions"]) > 0):
|
if(len(ref["actions"]) > 0):
|
||||||
vars.prompt = ref["actions"][0]["text"]
|
vars.prompt = ref["actions"][0]["text"]
|
||||||
|
|
48
readme.txt
48
readme.txt
|
@ -1,24 +1,22 @@
|
||||||
Thanks for checking out the KoboldAI Client! Keep up with news and updates on the subreddit:
|
Thanks for checking out the KoboldAI Client! Get support and updates on the subreddit:
|
||||||
https://www.reddit.com/r/KoboldAI/
|
https://www.reddit.com/r/KoboldAI/
|
||||||
|
|
||||||
[ABOUT]
|
[ABOUT]
|
||||||
|
|
||||||
This is a browser front-end for playing with multiple local & remote AI models.
|
This is a browser-based front-end for AI-assisted writing with multiple local & remote AI models.
|
||||||
The purpose is to provide a smoother, web-based UI experience than the various command-line AI apps.
|
It offers the standard array of tools, including Memory, Author's Note, World Info, Save & Load,
|
||||||
I'm pushing this out now that the major quality-of-life fearures have been roughed in (generate,
|
adjustable AI settings, formatting options, and the ability to import exising AI Dungeon adventures.
|
||||||
undo, edit-by-line, memory, save/load, etc), which means there will probably be bugs.
|
Current UI Snapshot: https://imgur.com/mjk5Yre
|
||||||
|
|
||||||
This application uses Transformers (https://huggingface.co/transformers/) to interact with the AI models
|
For local generation, KoboldAI uses Transformers (https://huggingface.co/transformers/) to interact
|
||||||
via Tensorflow. Tensorflow has CUDA/GPU support for shorter generation times, but I do not have anything
|
with the AI models. This can be done either on CPU, or GPU with sufficient hardware. If you have a
|
||||||
in this test release to set up CUDA/GPU support on your system. If you have a high-end GPU with
|
high-end GPU with sufficient VRAM to run your model of choice, see
|
||||||
sufficient VRAM to run your model of choice, see (https://www.tensorflow.org/install/gpu) for
|
(https://www.tensorflow.org/install/gpu) for instructions on enabling GPU support.
|
||||||
instructions on enabling GPU support.
|
|
||||||
|
|
||||||
Transformers/Tensorflow can still be used on CPU if you do not have high-end hardware, but generation
|
Transformers/Tensorflow can still be used on CPU if you do not have high-end hardware, but generation
|
||||||
times will be much longer. Alternatively, KoboldAI also supports InferKit (https://inferkit.com/).
|
times will be much longer. Alternatively, KoboldAI also supports utilizing remotely-hosted models.
|
||||||
This will allow you to send requests to a remotely hosted Megatron-11b model for fast generation times
|
The currently supported remote APIs are InferKit and Google Colab, see the dedicated sections below
|
||||||
on any hardware. This is a paid service, but signing up for a free account will let you generate up
|
for more info on these.
|
||||||
to 40,000 characters, and the free account will work with KoboldAI.
|
|
||||||
|
|
||||||
[SETUP]
|
[SETUP]
|
||||||
|
|
||||||
|
@ -33,17 +31,6 @@ to 40,000 characters, and the free account will work with KoboldAI.
|
||||||
5. Select a model from the list. Flask will start and give you a message that it's ready to connect.
|
5. Select a model from the list. Flask will start and give you a message that it's ready to connect.
|
||||||
6. Open a web browser and enter http://127.0.0.1:5000/
|
6. Open a web browser and enter http://127.0.0.1:5000/
|
||||||
|
|
||||||
[FOR INFERKIT INTEGRATION]
|
|
||||||
|
|
||||||
If you would like to use InferKit's Megatron-11b model, sign up for a free account on their website.
|
|
||||||
https://inferkit.com/
|
|
||||||
After verifying your email address, sign in and click on your profile picture in the top right.
|
|
||||||
In the drop down menu, click "API Key".
|
|
||||||
On the API Key page, click "Reveal API Key" and copy it. When starting KoboldAI and selecting the
|
|
||||||
InferKit API model, you will be asked to paste your API key into the terminal. After entering,
|
|
||||||
the API key will be stored in the client.settings file for future use.
|
|
||||||
You can see your remaining budget for generated characters on their website under "Billing & Usage".
|
|
||||||
|
|
||||||
[ENABLE COLORS IN WINDOWS 10 COMMAND LINE]
|
[ENABLE COLORS IN WINDOWS 10 COMMAND LINE]
|
||||||
|
|
||||||
If you see strange numeric tags in the console output, then your console of choice does not have
|
If you see strange numeric tags in the console output, then your console of choice does not have
|
||||||
|
@ -80,3 +67,14 @@ If your computer does not have an 8GB GPU to run GPT-Neo locally, you can now ru
|
||||||
notebook hosting a GPT-Neo-2.7B model remotely and connect to it using the KoboldAI client.
|
notebook hosting a GPT-Neo-2.7B model remotely and connect to it using the KoboldAI client.
|
||||||
See the instructions on the Colab at the link below:
|
See the instructions on the Colab at the link below:
|
||||||
https://colab.research.google.com/drive/1uGe9f4ruIQog3RLxfUsoThakvLpHjIkX?usp=sharing
|
https://colab.research.google.com/drive/1uGe9f4ruIQog3RLxfUsoThakvLpHjIkX?usp=sharing
|
||||||
|
|
||||||
|
[FOR INFERKIT INTEGRATION]
|
||||||
|
|
||||||
|
If you would like to use InferKit's Megatron-11b model, sign up for a free account on their website.
|
||||||
|
https://inferkit.com/
|
||||||
|
After verifying your email address, sign in and click on your profile picture in the top right.
|
||||||
|
In the drop down menu, click "API Key".
|
||||||
|
On the API Key page, click "Reveal API Key" and copy it. When starting KoboldAI and selecting the
|
||||||
|
InferKit API model, you will be asked to paste your API key into the terminal. After entering,
|
||||||
|
the API key will be stored in the client.settings file for future use.
|
||||||
|
You can see your remaining budget for generated characters on their website under "Billing & Usage".
|
Loading…
Reference in New Issue