Merge branch 'united' into accelerate
This commit is contained in:
commit
e143963161
73
aiserver.py
73
aiserver.py
|
@ -80,6 +80,17 @@ def new_init(self, *args, **kwargs):
|
||||||
self.ncols = 99
|
self.ncols = 99
|
||||||
tqdm.__init__ = new_init
|
tqdm.__init__ = new_init
|
||||||
|
|
||||||
|
# Fix some issues with the OPT tokenizer
|
||||||
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
old_pretrainedtokenizerbase_from_pretrained = PreTrainedTokenizerBase.from_pretrained.__func__
|
||||||
|
@classmethod
|
||||||
|
def new_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs):
|
||||||
|
tokenizer = old_pretrainedtokenizerbase_from_pretrained(cls, *args, **kwargs)
|
||||||
|
tokenizer._koboldai_header = tokenizer.encode("")
|
||||||
|
tokenizer.add_bos_token = False
|
||||||
|
tokenizer.add_prefix_space = False
|
||||||
|
return tokenizer
|
||||||
|
PreTrainedTokenizerBase.from_pretrained = new_pretrainedtokenizerbase_from_pretrained
|
||||||
|
|
||||||
#==================================================================#
|
#==================================================================#
|
||||||
# Variables & Storage
|
# Variables & Storage
|
||||||
|
@ -323,11 +334,12 @@ class vars:
|
||||||
actionmode = 1
|
actionmode = 1
|
||||||
dynamicscan = False
|
dynamicscan = False
|
||||||
host = False
|
host = False
|
||||||
|
flaskwebgui = False
|
||||||
nopromptgen = False
|
nopromptgen = False
|
||||||
rngpersist = False
|
rngpersist = False
|
||||||
nogenmod = False
|
nogenmod = False
|
||||||
welcome = False # Custom Welcome Text (False is default)
|
welcome = False # Custom Welcome Text (False is default)
|
||||||
newlinemode = "n"
|
newlinemode = "ns"
|
||||||
quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
|
quiet = False # If set will suppress any story text from being printed to the console (will only be seen on the client web page)
|
||||||
debug = False # If set to true, will send debug information to the client for display
|
debug = False # If set to true, will send debug information to the client for display
|
||||||
lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
|
lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage
|
||||||
|
@ -649,7 +661,7 @@ def loadmodelsettings():
|
||||||
js = {}
|
js = {}
|
||||||
if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
|
if vars.model_type == "xglm" or js.get("compat", "j") == "fairseq_lm":
|
||||||
vars.newlinemode = "s" # Default to </s> newline mode if using XGLM
|
vars.newlinemode = "s" # Default to </s> newline mode if using XGLM
|
||||||
if vars.model_type == "opt":
|
if vars.model_type == "opt" or vars.model_type == "bloom":
|
||||||
vars.newlinemode = "ns" # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
|
vars.newlinemode = "ns" # Handle </s> but don't convert newlines if using Fairseq models that have newlines trained in them
|
||||||
vars.modelconfig = js
|
vars.modelconfig = js
|
||||||
if("badwordsids" in js):
|
if("badwordsids" in js):
|
||||||
|
@ -1800,6 +1812,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
|
||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1814,6 +1830,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
|
||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1841,6 +1861,10 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -2061,7 +2085,7 @@ def index():
|
||||||
if 'new_ui' in request.args:
|
if 'new_ui' in request.args:
|
||||||
return render_template('index_new.html', hide_ai_menu=args.noaimenu)
|
return render_template('index_new.html', hide_ai_menu=args.noaimenu)
|
||||||
else:
|
else:
|
||||||
return render_template('index.html', hide_ai_menu=args.noaimenu)
|
return render_template('index.html', hide_ai_menu=args.noaimenu, flaskwebgui=vars.flaskwebgui)
|
||||||
@app.route('/favicon.ico')
|
@app.route('/favicon.ico')
|
||||||
def favicon():
|
def favicon():
|
||||||
return send_from_directory(app.root_path,
|
return send_from_directory(app.root_path,
|
||||||
|
@ -2759,6 +2783,8 @@ def do_connect():
|
||||||
emit('from_server', {'cmd': 'connected', 'smandelete': vars.smandelete, 'smanrename': vars.smanrename, 'modelname': getmodelname()})
|
emit('from_server', {'cmd': 'connected', 'smandelete': vars.smandelete, 'smanrename': vars.smanrename, 'modelname': getmodelname()})
|
||||||
if(vars.host):
|
if(vars.host):
|
||||||
emit('from_server', {'cmd': 'runs_remotely'})
|
emit('from_server', {'cmd': 'runs_remotely'})
|
||||||
|
if(vars.flaskwebgui):
|
||||||
|
emit('from_server', {'cmd': 'flaskwebgui'})
|
||||||
if(vars.allowsp):
|
if(vars.allowsp):
|
||||||
emit('from_server', {'cmd': 'allowsp', 'data': vars.allowsp})
|
emit('from_server', {'cmd': 'allowsp', 'data': vars.allowsp})
|
||||||
|
|
||||||
|
@ -3594,24 +3620,26 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||||
global tokenizer
|
global tokenizer
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
|
||||||
|
|
||||||
|
lnheader = len(tokenizer._koboldai_header)
|
||||||
|
|
||||||
# Calculate token budget
|
# Calculate token budget
|
||||||
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
prompttkns = tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', vars.prompt)), max_length=int(2e9), truncation=True)
|
||||||
lnprompt = len(prompttkns)
|
lnprompt = len(prompttkns)
|
||||||
|
|
||||||
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
memtokens = tokenizer.encode(utils.encodenewlines(mem), max_length=int(2e9), truncation=True)
|
||||||
lnmem = len(memtokens)
|
lnmem = len(memtokens)
|
||||||
if(lnmem > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The memory in your story is too long. Please either write a shorter memory text or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
witokens = tokenizer.encode(utils.encodenewlines(winfo), max_length=int(2e9), truncation=True)
|
||||||
lnwi = len(witokens)
|
lnwi = len(witokens)
|
||||||
if(lnmem + lnwi > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The current active world info keys take up too many tokens. Please either write shorter world info, decrease World Info Depth or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
anotetkns = tokenizer.encode(utils.encodenewlines(anotetxt), max_length=int(2e9), truncation=True)
|
||||||
lnanote = len(anotetkns)
|
lnanote = len(anotetkns)
|
||||||
if(lnmem + lnwi + lnanote > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
raise OverflowError("The author's note in your story is too long. Please either write a shorter author's note or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt.")
|
||||||
|
|
||||||
if(vars.useprompt):
|
if(vars.useprompt):
|
||||||
|
@ -3622,14 +3650,14 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||||
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
lnsubmission = len(tokenizer.encode(utils.encodenewlines(vars.comregex_ai.sub('', submission)), max_length=int(2e9), truncation=True)) if submission is not None else 0
|
||||||
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
maybe_lnprompt = lnprompt if vars.useprompt and actionlen > 0 else 0
|
||||||
|
|
||||||
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnsp - vars.genamt - budget_deduction):
|
if(lnmem + lnwi + lnanote + maybe_lnprompt + lnsubmission > vars.max_length - lnheader - lnsp - vars.genamt - budget_deduction):
|
||||||
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
raise OverflowError("Your submission is too long. Please either write a shorter submission or increase the Max Tokens setting. If you are using a soft prompt, additionally consider using a smaller soft prompt. If you are using the Always Add Prompt setting, turning it off may help.")
|
||||||
|
|
||||||
assert budget >= 0
|
assert budget >= 0
|
||||||
|
|
||||||
if(actionlen == 0):
|
if(actionlen == 0):
|
||||||
# First/Prompt action
|
# First/Prompt action
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
ln = len(tokens) + lnsp
|
ln = len(tokens) + lnsp
|
||||||
return tokens, ln+1, ln+vars.genamt
|
return tokens, ln+1, ln+vars.genamt
|
||||||
|
@ -3677,12 +3705,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
|
||||||
# Did we get to add the A.N.? If not, do it here
|
# Did we get to add the A.N.? If not, do it here
|
||||||
if(anotetxt != ""):
|
if(anotetxt != ""):
|
||||||
if((not anoteadded) or forceanote):
|
if((not anoteadded) or forceanote):
|
||||||
tokens = memtokens + witokens + anotetkns + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + anotetkns + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
else:
|
else:
|
||||||
# Prepend Memory, WI, and Prompt before action tokens
|
# Prepend Memory, WI, and Prompt before action tokens
|
||||||
tokens = memtokens + witokens + prompttkns + tokens
|
tokens = tokenizer._koboldai_header + memtokens + witokens + prompttkns + tokens
|
||||||
|
|
||||||
# Send completed bundle to generator
|
# Send completed bundle to generator
|
||||||
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction
|
||||||
|
@ -5892,15 +5920,26 @@ if __name__ == "__main__":
|
||||||
vars.serverstarted = True
|
vars.serverstarted = True
|
||||||
socketio.run(app, host='0.0.0.0', port=port)
|
socketio.run(app, host='0.0.0.0', port=port)
|
||||||
else:
|
else:
|
||||||
import webbrowser
|
|
||||||
webbrowser.open_new('http://localhost:{0}'.format(port))
|
|
||||||
print("{0}Server started!\nYou may now connect with a browser at http://127.0.0.1:{1}/{2}"
|
|
||||||
.format(colors.GREEN, port, colors.END))
|
|
||||||
vars.serverstarted = True
|
|
||||||
if args.unblock:
|
if args.unblock:
|
||||||
|
import webbrowser
|
||||||
|
webbrowser.open_new('http://localhost:{0}'.format(port))
|
||||||
|
print("{0}Server started!\nYou may now connect with a browser at http://127.0.0.1:{1}/{2}"
|
||||||
|
.format(colors.GREEN, port, colors.END))
|
||||||
|
vars.serverstarted = True
|
||||||
socketio.run(app, port=port, host='0.0.0.0')
|
socketio.run(app, port=port, host='0.0.0.0')
|
||||||
else:
|
else:
|
||||||
socketio.run(app, port=port)
|
try:
|
||||||
|
from flaskwebgui import FlaskUI
|
||||||
|
vars.serverstarted = True
|
||||||
|
vars.flaskwebgui = True
|
||||||
|
FlaskUI(app, socketio=socketio, start_server="flask-socketio", maximized=True, close_server_on_exit=True).run()
|
||||||
|
except:
|
||||||
|
import webbrowser
|
||||||
|
webbrowser.open_new('http://localhost:{0}'.format(port))
|
||||||
|
print("{0}Server started!\nYou may now connect with a browser at http://127.0.0.1:{1}/{2}"
|
||||||
|
.format(colors.GREEN, port, colors.END))
|
||||||
|
vars.serverstarted = True
|
||||||
|
socketio.run(app, port=port)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
general_startup()
|
general_startup()
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
async function getRequest(url='') {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'GET',
|
||||||
|
cache: 'no-cache'
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
|
||||||
|
let url = document.location
|
||||||
|
let route = "/flaskwebgui-keep-server-alive";
|
||||||
|
let interval_request = 3 * 1000; //sec
|
||||||
|
|
||||||
|
function keep_alive_server(){
|
||||||
|
getRequest(url + route);
|
||||||
|
}
|
||||||
|
|
||||||
|
setInterval(keep_alive_server, interval_request);
|
||||||
|
|
||||||
|
})
|
|
@ -19,6 +19,9 @@
|
||||||
<script src="static/rangy-core.min.js"></script>
|
<script src="static/rangy-core.min.js"></script>
|
||||||
<script src="static/application.js?ver=1.18.1b"></script>
|
<script src="static/application.js?ver=1.18.1b"></script>
|
||||||
<script src="static/favicon.js"></script>
|
<script src="static/favicon.js"></script>
|
||||||
|
{% if flaskwebgui %}
|
||||||
|
<script src="static/flask_web_gui.js"></script>
|
||||||
|
{% endif %}
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<input type="file" id="remote-save-select" accept="application/json" style="display:none">
|
<input type="file" id="remote-save-select" accept="application/json" style="display:none">
|
||||||
|
|
|
@ -1324,6 +1324,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1336,6 +1340,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1348,6 +1356,10 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
|
Loading…
Reference in New Issue