Merge branch 'united' into api

This commit is contained in:
vfbd 2022-08-12 13:57:50 -04:00
commit a7fb2c8414
5 changed files with 185 additions and 129 deletions

View File

@ -755,7 +755,10 @@ def device_config(config):
global breakmodel, generator
import breakmodel
n_layers = utils.num_layers(config)
if(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
if args.cpu:
breakmodel.gpu_blocks = [0]*n_layers
return
elif(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
try:
if(not args.breakmodel_gpulayers):
breakmodel.gpu_blocks = []
@ -1428,6 +1431,8 @@ def get_model_info(model, directory=""):
url = True
elif not utils.HAS_ACCELERATE and not torch.cuda.is_available():
pass
elif args.cpu:
pass
else:
layer_count = get_layer_count(model, directory=directory)
if layer_count is None:
@ -1952,8 +1957,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
time.sleep(0.1)
if gpu_layers is not None:
args.breakmodel_gpulayers = gpu_layers
elif initial_load:
gpu_layers = args.breakmodel_gpulayers
if disk_layers is not None:
args.breakmodel_disklayers = int(disk_layers)
elif initial_load:
disk_layers = args.breakmodel_disklayers
#We need to wipe out the existing model and refresh the cuda cache
model = None
@ -2064,41 +2073,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
else:
print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
if args.model:
if(vars.hascuda):
genselected = True
if args.cpu:
vars.usegpu = False
gpu_layers = None
disk_layers = None
vars.breakmodel = False
elif vars.hascuda:
if(vars.bmsupported):
vars.usegpu = False
vars.breakmodel = True
else:
vars.breakmodel = False
vars.usegpu = True
vars.breakmodel = utils.HAS_ACCELERATE
if(vars.bmsupported):
vars.usegpu = False
vars.breakmodel = True
if(args.cpu):
vars.usegpu = False
vars.breakmodel = utils.HAS_ACCELERATE
elif(vars.hascuda):
if(vars.bmsupported):
genselected = True
vars.usegpu = False
vars.breakmodel = True
else:
genselected = False
else:
genselected = False
if(vars.hascuda):
if(use_gpu):
if(vars.bmsupported):
vars.breakmodel = True
vars.usegpu = False
genselected = True
else:
vars.breakmodel = False
vars.usegpu = True
genselected = True
else:
vars.breakmodel = utils.HAS_ACCELERATE
vars.usegpu = False
genselected = True
# Ask for API key if InferKit was selected
if(vars.model == "InferKit"):
@ -2317,7 +2304,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
# If we're using torch_lazy_loader, we need to get breakmodel config
# early so that it knows where to load the individual model tensors
if(utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel):
if (utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel) and not vars.nobreakmodel:
print(1)
device_config(model_config)
# Download model from Huggingface if it does not exist, otherwise load locally
@ -2448,6 +2436,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
vars.modeldim = get_hidden_size_from_model(model)
if(not vars.lazy_load):
print(2)
device_config(model.config)
move_model_to_devices(model)
elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
@ -3694,7 +3683,7 @@ def get_message(msg):
else:
filename = "settings/{}.breakmodel".format(vars.model.replace('/', '_'))
f = open(filename, "w")
f.write(msg['gpu_layers'] + '\n' + msg['disk_layers'])
f.write(str(msg['gpu_layers']) + '\n' + str(msg['disk_layers']))
f.close()
vars.colaburl = msg['url'] + "/request"
load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
@ -3842,6 +3831,40 @@ def get_message(msg):
emit('from_server', {'cmd': 'set_debug', 'data': msg['data']}, broadcast=True)
if vars.debug:
send_debug()
elif(msg['cmd'] == 'getfieldbudget'):
unencoded = msg["data"]["unencoded"]
field = msg["data"]["field"]
# Tokenizer may be undefined here when a model has not been chosen.
if "tokenizer" not in globals():
# We don't have a tokenizer, just return nulls.
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": None, "max": None, "field": field}},
broadcast=True
)
return
header_length = len(tokenizer._koboldai_header)
max_tokens = vars.max_length - header_length - vars.sp_length - vars.genamt
if not unencoded:
# Unencoded is empty, just return 0
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": 0, "max": max_tokens, "field": field}},
broadcast=True
)
else:
if field == "anoteinput":
unencoded = buildauthorsnote(unencoded, msg["data"]["anotetemplate"])
tokens_length = len(tokenizer.encode(unencoded))
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": tokens_length, "max": max_tokens, "field": field}},
broadcast=True
)
#==================================================================#
# Send userscripts list to client
@ -4326,6 +4349,12 @@ def actionredo():
#==================================================================#
#
#==================================================================#
def buildauthorsnote(authorsnote, template):
# Build Author's Note if set
if authorsnote == "":
return ""
return ("\n" + template + "\n").replace("<|>", authorsnote)
def calcsubmitbudgetheader(txt, **kwargs):
# Scan for WorldInfo matches
winfo, found_entries = checkworldinfo(txt, **kwargs)
@ -4336,11 +4365,7 @@ def calcsubmitbudgetheader(txt, **kwargs):
else:
mem = vars.memory
# Build Author's Note if set
if(vars.authornote != ""):
anotetxt = ("\n" + vars.authornotetemplate + "\n").replace("<|>", vars.authornote)
else:
anotetxt = ""
anotetxt = buildauthorsnote(vars.authornote, vars.authornotetemplate)
return winfo, mem, anotetxt, found_entries

View File

@ -1,76 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "ColabKobold Code",
"provenance": [],
"authorship_tag": "ABX9TyOuIHmyxj4U9dipAib4hfIi",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/vscode.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# ColabKobold VSCode Edition\n",
"This is a special edition of ColabKobold aimed at developers, it will not start a KoboldAI instance for you to play KoboldAI and instead will launch a fully functional copy of VSCode for easy development.\n",
"\n",
"Few things of note:\n",
"1. Make sure the desired (or no) accelertor is selected on Colab, you do not want a TPU ban for not using it.\n",
"1. The Version can be replaced with your github URL and appended with -b for the branch for example \"https://github.com/henk717/koboldai -b united\" dependencies will automatically be installed from requirements.txt or requirements_mtj.txt.\n",
"1. With the args you can specify launch options for the KoboldAI Deployment Script, this way you can easily preinstall models to your development instance so you have a model to test with. To install TPU requirements specify the -m TPUMeshTransformerGPTJ argument.\n",
"1. You will need an Ngrok auth token which you can obtain here : https://dashboard.ngrok.com/get-started/your-authtoken\n",
"1. KoboldAI is installed in /content/koboldai-client opening this folder is enough to automatically get full git history and revision support. Also keep in mind that it mounts your Google Drive, be careful comitting directly from this instance.\n",
"1. With Ctrl + Shift + ` you can get a terminal to launch KoboldAI with your own parameters, launching with --colab is recommended.\n",
"\n",
"# [If you are not a developer and are looking to use KoboldAI click here](https://henk.tech/colabkobold)"
],
"metadata": {
"id": "hMRnGz42Xsy3"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "40B1QvI3Xv02"
},
"outputs": [],
"source": [
"#@title VSCode Server\n",
"Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
"Args = \"-m TPUMeshTransformerGPTJ -a https://api.wandb.ai/files/ve-forbryderne/skein/files/gpt-j-6b-skein-jax/aria2.txt\" #@param {type:\"string\"}\n",
"Authtoken = \"\" #@param {type:\"string\"}\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/drive/')\n",
"\n",
"!wget https://henk.tech/ckds -O - | bash /dev/stdin -g $Version -i only $Args\n",
"\n",
"!pip install colabcode\n",
"!pip install 'flask>=2.1.0'\n",
"from colabcode import ColabCode\n",
"ColabCode(authtoken=Authtoken)"
]
}
]
}

View File

@ -512,6 +512,16 @@ function addWiLine(ob) {
$(".wisortable-excluded-dynamic").removeClass("wisortable-excluded-dynamic");
$(this).parent().css("max-height", "").find(".wicomment").find(".form-control").css("max-height", "");
});
for (const wientry of document.getElementsByClassName("wientry")) {
// If we are uninitialized, skip.
if ($(wientry).closest(".wilistitem-uninitialized").length) continue;
// add() will not add if the class is already present
wientry.classList.add("tokens-counted");
}
registerTokenCounters();
}
function addWiFolder(uid, ob) {
@ -835,6 +845,7 @@ function exitMemoryMode() {
button_actmem.html("Memory");
show([button_actback, button_actfwd, button_actretry, button_actwi]);
input_text.val("");
updateInputBudget(input_text[0]);
// Hide Author's Note field
anote_menu.slideUp("fast");
}
@ -1078,7 +1089,7 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
html = html + "<span class=\"loadlisticon loadmodellisticon-folder oi oi-folder allowed\" aria-hidden=\"true\"></span>"
} else {
//this is a model
html = html + "<div class=\"loadlistpadding\"></div>"
html = html + "<div class=\"loadlisticon oi oi-caret-right allowed\"></div>"
}
//now let's do the delete icon if applicable
@ -1096,6 +1107,7 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
</div>"
loadmodelcontent.append(html);
//If this is a menu
console.log(ar[i]);
if(ar[i][3]) {
$("#loadmodel"+i).off("click").on("click", (function () {
return function () {
@ -1105,15 +1117,27 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
})(i));
//Normal load
} else {
$("#loadmodel"+i).off("click").on("click", (function () {
return function () {
$("#use_gpu_div").addClass("hidden");
$("#modelkey").addClass("hidden");
$("#modellayers").addClass("hidden");
socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name")});
highlightLoadLine($(this));
}
})(i));
if (['NeoCustom', 'GPT2Custom'].includes(menu)) {
$("#loadmodel"+i).off("click").on("click", (function () {
return function () {
$("#use_gpu_div").addClass("hidden");
$("#modelkey").addClass("hidden");
$("#modellayers").addClass("hidden");
socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name"), 'path': $(this).attr("pretty_name")});
highlightLoadLine($(this));
}
})(i));
} else {
$("#loadmodel"+i).off("click").on("click", (function () {
return function () {
$("#use_gpu_div").addClass("hidden");
$("#modelkey").addClass("hidden");
$("#modellayers").addClass("hidden");
socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name")});
highlightLoadLine($(this));
}
})(i));
}
}
}
}
@ -2139,6 +2163,37 @@ function interpolateRGB(color0, color1, t) {
]
}
function updateInputBudget(inputElement) {
let data = {"unencoded": inputElement.value, "field": inputElement.id};
if (inputElement.id === "anoteinput") {
data["anotetemplate"] = $("#anotetemplate").val();
}
socket.send({"cmd": "getfieldbudget", "data": data});
}
function registerTokenCounters() {
// Add token counters to all input containers with the class of "tokens-counted",
// if a token counter is not already a child of said container.
for (const el of document.getElementsByClassName("tokens-counted")) {
if (el.getElementsByClassName("input-token-usage").length) continue;
let span = document.createElement("span");
span.classList.add("input-token-usage");
span.innerText = "?/? Tokens";
el.appendChild(span);
let inputElement = el.querySelector("input, textarea");
inputElement.addEventListener("input", function() {
updateInputBudget(this);
});
updateInputBudget(inputElement);
}
}
//=================================================================//
// READY/RUNTIME
//=================================================================//
@ -2482,6 +2537,7 @@ $(document).ready(function(){
memorytext = msg.data;
input_text.val(msg.data);
}
updateInputBudget(input_text[0]);
} else if(msg.cmd == "setmemory") {
memorytext = msg.data;
if(memorymode) {
@ -2603,6 +2659,7 @@ $(document).ready(function(){
} else if(msg.cmd == "setanote") {
// Set contents of Author's Note field
anote_input.val(msg.data);
updateInputBudget(anote_input[0]);
} else if(msg.cmd == "setanotetemplate") {
// Set contents of Author's Note Template field
$("#anotetemplate").val(msg.data);
@ -2842,6 +2899,8 @@ $(document).ready(function(){
if (msg.key) {
$("#modelkey").removeClass("hidden");
$("#modelkey")[0].value = msg.key_value;
//if we're in the API list, disable to load button until the model is selected (after the API Key is entered)
disableButtons([load_model_accept]);
} else {
$("#modelkey").addClass("hidden");
@ -2879,6 +2938,7 @@ $(document).ready(function(){
}
} else if(msg.cmd == 'oai_engines') {
$("#oaimodel").removeClass("hidden")
enableButtons([load_model_accept]);
selected_item = 0;
length = $("#oaimodel")[0].options.length;
for (let i = 0; i < length; i++) {
@ -2915,7 +2975,18 @@ $(document).ready(function(){
opt.innerHTML = engine[1];
$("#oaimodel")[0].appendChild(opt);
}
} else if(msg.cmd == 'showfieldbudget') {
let inputElement = document.getElementById(msg.data.field);
let tokenBudgetElement = inputElement.parentNode.getElementsByClassName("input-token-usage")[0];
if (msg.data.max === null) {
tokenBudgetElement.innerText = "";
} else {
let tokenLength = msg.data.length ?? "?";
let tokenMax = msg.data.max ?? "?";
tokenBudgetElement.innerText = `${tokenLength}/${tokenMax} Tokens`;
}
}
enableButtons([load_model_accept]);
});
socket.on('disconnect', function() {
@ -3383,6 +3454,15 @@ $(document).ready(function(){
if (handled) ev.preventDefault();
});
$("#anotetemplate").on("input", function() {
updateInputBudget(anote_input[0]);
})
registerTokenCounters();
updateInputBudget(input_text[0]);
});

View File

@ -1695,3 +1695,30 @@ body.connected .popupfooter, .popupfooter.always-available {
overflow-x: auto;
white-space: nowrap;
}
.tokens-counted {
position: relative;
}
.input-token-usage {
color: white;
position: absolute;
font-size: 10px;
bottom: 2px;
right: 5px;
-webkit-user-select: none;
-moz-user-select: none;
-ms-user-select: none;
user-select: none;
}
/* Override needed here due to the 10px right padding on inputrowleft; add 10 px. */
#inputrowleft > .input-token-usage {
right: 15px;
bottom: 1px;
}
.wientry > .input-token-usage {
bottom: 8px;
}

View File

@ -157,7 +157,7 @@
<div id="inputrowmode">
<button type="button" class="btn btn-secondary hidden" id="btnmode">Mode:<br/><b id="btnmode_label">Story</b></button>
</div>
<div id="inputrowleft">
<div id="inputrowleft" class="tokens-counted">
<textarea class="form-control" id="input_text" placeholder="Enter text here"></textarea>
</div>
<div id="inputrowright">
@ -170,7 +170,7 @@
<div class="anotelabel no-padding">
Author's Note
</div>
<div class="anotefield">
<div class="anotefield tokens-counted">
<textarea class="form-control" placeholder="Author's Note" id="anoteinput"></textarea>
</div>
</div>