Merge branch 'united' into api

2025-06-05 21:59:24 +02:00 · 2022-08-12 13:57:50 -04:00
parent e879d1c5f3 09a709f0dc
commit a7fb2c8414
5 changed files with 185 additions and 129 deletions
--- a/aiserver.py
+++ b/aiserver.py
@ -755,7 +755,10 @@ def device_config(config):
    global breakmodel, generator
    import breakmodel
    n_layers = utils.num_layers(config)
-    if(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
+    if args.cpu:
+        breakmodel.gpu_blocks = [0]*n_layers
+        return
+    elif(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
        try:
            if(not args.breakmodel_gpulayers):
                breakmodel.gpu_blocks = []
@ -1428,6 +1431,8 @@ def get_model_info(model, directory=""):
        url = True
    elif not utils.HAS_ACCELERATE and not torch.cuda.is_available():
        pass
+    elif args.cpu:
+        pass
    else:
        layer_count = get_layer_count(model, directory=directory)
        if layer_count is None:
@ -1952,8 +1957,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
            time.sleep(0.1)
    if gpu_layers is not None:
        args.breakmodel_gpulayers = gpu_layers
+    elif initial_load:
+        gpu_layers = args.breakmodel_gpulayers
    if disk_layers is not None:
        args.breakmodel_disklayers = int(disk_layers)
+    elif initial_load:
+        disk_layers = args.breakmodel_disklayers
    
    #We need to wipe out the existing model and refresh the cuda cache
    model = None
@ -2064,41 +2073,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
        else:
            print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
        
-        if args.model:
-            if(vars.hascuda):
-                genselected = True
+        if args.cpu:
+            vars.usegpu = False
+            gpu_layers = None
+            disk_layers = None
+            vars.breakmodel = False
+        elif vars.hascuda:
+            if(vars.bmsupported):
+                vars.usegpu = False
+                vars.breakmodel = True
+            else:
+                vars.breakmodel = False
                vars.usegpu = True
-                vars.breakmodel = utils.HAS_ACCELERATE
-            if(vars.bmsupported):
-                vars.usegpu = False
-                vars.breakmodel = True
-            if(args.cpu):
-                vars.usegpu = False
-                vars.breakmodel = utils.HAS_ACCELERATE
-        elif(vars.hascuda):    
-            if(vars.bmsupported):
-                genselected = True
-                vars.usegpu = False
-                vars.breakmodel = True
-            else:
-                genselected = False
-        else:
-            genselected = False

-        if(vars.hascuda):
-            if(use_gpu):
-                if(vars.bmsupported):
-                    vars.breakmodel = True
-                    vars.usegpu = False
-                    genselected = True
-                else:
-                    vars.breakmodel = False
-                    vars.usegpu = True
-                    genselected = True
-            else:
-                vars.breakmodel = utils.HAS_ACCELERATE
-                vars.usegpu = False
-                genselected = True

    # Ask for API key if InferKit was selected
    if(vars.model == "InferKit"):
@ -2317,7 +2304,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                
                # If we're using torch_lazy_loader, we need to get breakmodel config
                # early so that it knows where to load the individual model tensors
-                if(utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel):
+                if (utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel) and not vars.nobreakmodel:
+                    print(1)
                    device_config(model_config)

                # Download model from Huggingface if it does not exist, otherwise load locally
@ -2448,6 +2436,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                    elif(vars.breakmodel):  # Use both RAM and VRAM (breakmodel)
                        vars.modeldim = get_hidden_size_from_model(model)
                        if(not vars.lazy_load):
+                            print(2)
                            device_config(model.config)
                        move_model_to_devices(model)
                    elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
@ -3694,7 +3683,7 @@ def get_message(msg):
            else:
                filename = "settings/{}.breakmodel".format(vars.model.replace('/', '_'))
            f = open(filename, "w")
-            f.write(msg['gpu_layers'] + '\n' + msg['disk_layers'])
+            f.write(str(msg['gpu_layers']) + '\n' + str(msg['disk_layers']))
            f.close()
        vars.colaburl = msg['url'] + "/request"
        load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
@ -3842,6 +3831,40 @@ def get_message(msg):
        emit('from_server', {'cmd': 'set_debug', 'data': msg['data']}, broadcast=True)
        if vars.debug:
            send_debug()
+    elif(msg['cmd'] == 'getfieldbudget'):
+        unencoded = msg["data"]["unencoded"]
+        field = msg["data"]["field"]
+
+        # Tokenizer may be undefined here when a model has not been chosen.
+        if "tokenizer" not in globals():
+            # We don't have a tokenizer, just return nulls.
+            emit(
+                'from_server',
+                {'cmd': 'showfieldbudget', 'data': {"length": None, "max": None, "field": field}},
+                broadcast=True
+            )
+            return
+
+        header_length = len(tokenizer._koboldai_header)
+        max_tokens = vars.max_length - header_length - vars.sp_length - vars.genamt
+
+        if not unencoded:
+            # Unencoded is empty, just return 0
+            emit(
+                'from_server',
+                {'cmd': 'showfieldbudget', 'data': {"length": 0, "max": max_tokens, "field": field}},
+                broadcast=True
+            )
+        else:
+            if field == "anoteinput":
+                unencoded = buildauthorsnote(unencoded, msg["data"]["anotetemplate"])
+            tokens_length = len(tokenizer.encode(unencoded))
+
+            emit(
+                'from_server',
+                {'cmd': 'showfieldbudget', 'data': {"length": tokens_length, "max": max_tokens, "field": field}},
+                broadcast=True
+            )

 #==================================================================#
 #  Send userscripts list to client
@ -4326,6 +4349,12 @@ def actionredo():
 #==================================================================#
 #  
 #==================================================================#
+def buildauthorsnote(authorsnote, template):
+    # Build Author's Note if set
+    if authorsnote == "":
+        return ""
+    return ("\n" + template + "\n").replace("<|>", authorsnote)
+
 def calcsubmitbudgetheader(txt, **kwargs):
    # Scan for WorldInfo matches
    winfo, found_entries = checkworldinfo(txt, **kwargs)
@ -4336,11 +4365,7 @@ def calcsubmitbudgetheader(txt, **kwargs):
    else:
        mem = vars.memory

-    # Build Author's Note if set
-    if(vars.authornote != ""):
-        anotetxt  = ("\n" + vars.authornotetemplate + "\n").replace("<|>", vars.authornote)
-    else:
-        anotetxt = ""
+    anotetxt = buildauthorsnote(vars.authornote, vars.authornotetemplate)

    return winfo, mem, anotetxt, found_entries

--- a/colab/vscode.ipynb
+++ b/colab/vscode.ipynb
@ -1,76 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "ColabKobold Code",
-      "provenance": [],
-      "authorship_tag": "ABX9TyOuIHmyxj4U9dipAib4hfIi",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "TPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/henk717/KoboldAI/blob/united/colab/vscode.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# ColabKobold VSCode Edition\n",
-        "This is a special edition of ColabKobold aimed at developers, it will not start a KoboldAI instance for you to play KoboldAI and instead will launch a fully functional copy of VSCode for easy development.\n",
-        "\n",
-        "Few things of note:\n",
-        "1. Make sure the desired (or no) accelertor is selected on Colab, you do not want a TPU ban for not using it.\n",
-        "1. The Version can be replaced with your github URL and appended with -b for the branch for example \"https://github.com/henk717/koboldai -b united\" dependencies will automatically be installed from requirements.txt or requirements_mtj.txt.\n",
-        "1. With the args you can specify launch options for the KoboldAI Deployment Script, this way you can easily preinstall models to your development instance so you have a model to test with. To install TPU requirements specify the -m TPUMeshTransformerGPTJ argument.\n",
-        "1. You will need an Ngrok auth token which you can obtain here : https://dashboard.ngrok.com/get-started/your-authtoken\n",
-        "1. KoboldAI is installed in /content/koboldai-client opening this folder is enough to automatically get full git history and revision support. Also keep in mind that it mounts your Google Drive, be careful comitting directly from this instance.\n",
-        "1. With Ctrl + Shift + ` you can get a terminal to launch KoboldAI with your own parameters, launching with --colab is recommended.\n",
-        "\n",
-        "# [If you are not a developer and are looking to use KoboldAI click here](https://henk.tech/colabkobold)"
-      ],
-      "metadata": {
-        "id": "hMRnGz42Xsy3"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "40B1QvI3Xv02"
-      },
-      "outputs": [],
-      "source": [
-        "#@title VSCode Server\n",
-        "Version = \"United\" #@param [\"Official\", \"United\"] {allow-input: true}\n",
-        "Args = \"-m TPUMeshTransformerGPTJ -a https://api.wandb.ai/files/ve-forbryderne/skein/files/gpt-j-6b-skein-jax/aria2.txt\" #@param {type:\"string\"}\n",
-        "Authtoken = \"\" #@param {type:\"string\"}\n",
-        "\n",
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive/')\n",
-        "\n",
-        "!wget https://henk.tech/ckds -O - | bash /dev/stdin -g $Version -i only $Args\n",
-        "\n",
-        "!pip install colabcode\n",
-        "!pip install 'flask>=2.1.0'\n",
-        "from colabcode import ColabCode\n",
-        "ColabCode(authtoken=Authtoken)"
-      ]
-    }
-  ]
-}
--- a/static/application.js
+++ b/static/application.js
@ -512,6 +512,16 @@ function addWiLine(ob) {
 		$(".wisortable-excluded-dynamic").removeClass("wisortable-excluded-dynamic");
 		$(this).parent().css("max-height", "").find(".wicomment").find(".form-control").css("max-height", "");
 	});
+
+	for (const wientry of document.getElementsByClassName("wientry")) {
+		// If we are uninitialized, skip.
+		if ($(wientry).closest(".wilistitem-uninitialized").length) continue;
+
+		// add() will not add if the class is already present
+		wientry.classList.add("tokens-counted");
+	}
+
+	registerTokenCounters();
 }

 function addWiFolder(uid, ob) {
@ -835,6 +845,7 @@ function exitMemoryMode() {
 	button_actmem.html("Memory");
 	show([button_actback, button_actfwd, button_actretry, button_actwi]);
 	input_text.val("");
+	updateInputBudget(input_text[0]);
 	// Hide Author's Note field
 	anote_menu.slideUp("fast");
 }
@ -1078,7 +1089,7 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
 			html = html + "<span class=\"loadlisticon loadmodellisticon-folder oi oi-folder allowed\"  aria-hidden=\"true\"></span>"
 		} else {
 		//this is a model
-			html = html + "<div class=\"loadlistpadding\"></div>"
+			html = html + "<div class=\"loadlisticon oi oi-caret-right allowed\"></div>"
 		}
 		
 		//now let's do the delete icon if applicable
@ -1096,6 +1107,7 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
 					</div>"
 		loadmodelcontent.append(html);
 		//If this is a menu
+		console.log(ar[i]);
 		if(ar[i][3]) {
 			$("#loadmodel"+i).off("click").on("click", (function () {
 				return function () {
@ -1105,15 +1117,27 @@ function buildLoadModelList(ar, menu, breadcrumbs, showdelete) {
 			})(i));
 		//Normal load
 		} else {
-			$("#loadmodel"+i).off("click").on("click", (function () {
-				return function () {
-					$("#use_gpu_div").addClass("hidden");
-					$("#modelkey").addClass("hidden");
-					$("#modellayers").addClass("hidden");
-					socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name")});
-					highlightLoadLine($(this));
-				}
-			})(i));
+			if (['NeoCustom', 'GPT2Custom'].includes(menu)) {
+				$("#loadmodel"+i).off("click").on("click", (function () {
+					return function () {
+						$("#use_gpu_div").addClass("hidden");
+						$("#modelkey").addClass("hidden");
+						$("#modellayers").addClass("hidden");
+						socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name"), 'path': $(this).attr("pretty_name")});
+						highlightLoadLine($(this));
+					}
+				})(i));
+			} else {
+				$("#loadmodel"+i).off("click").on("click", (function () {
+					return function () {
+						$("#use_gpu_div").addClass("hidden");
+						$("#modelkey").addClass("hidden");
+						$("#modellayers").addClass("hidden");
+						socket.send({'cmd': 'selectmodel', 'data': $(this).attr("name")});
+						highlightLoadLine($(this));
+					}
+				})(i));
+			}
 		}
 	}
 }
@ -2139,6 +2163,37 @@ function interpolateRGB(color0, color1, t) {
 	]
 }

+function updateInputBudget(inputElement) {
+	let data = {"unencoded": inputElement.value, "field": inputElement.id};
+
+	if (inputElement.id === "anoteinput") {
+		data["anotetemplate"] = $("#anotetemplate").val();
+	}
+
+	socket.send({"cmd": "getfieldbudget", "data": data});
+}
+
+function registerTokenCounters() {
+	// Add token counters to all input containers with the class of "tokens-counted",
+	// if a token counter is not already a child of said container.
+	for (const el of document.getElementsByClassName("tokens-counted")) {
+		if (el.getElementsByClassName("input-token-usage").length) continue;
+
+		let span = document.createElement("span");
+		span.classList.add("input-token-usage");
+		span.innerText = "?/? Tokens";
+		el.appendChild(span);
+
+		let inputElement = el.querySelector("input, textarea");
+
+		inputElement.addEventListener("input", function() {
+			updateInputBudget(this);
+		});
+		
+		updateInputBudget(inputElement);
+	}
+}
+
 //=================================================================//
 //  READY/RUNTIME
 //=================================================================//
@ -2482,6 +2537,7 @@ $(document).ready(function(){
 				memorytext = msg.data;
 				input_text.val(msg.data);
 			}
+			updateInputBudget(input_text[0]);
 		} else if(msg.cmd == "setmemory") {
 			memorytext = msg.data;
 			if(memorymode) {
@ -2603,6 +2659,7 @@ $(document).ready(function(){
 		} else if(msg.cmd == "setanote") {
 			// Set contents of Author's Note field
 			anote_input.val(msg.data);
+			updateInputBudget(anote_input[0]);
 		} else if(msg.cmd == "setanotetemplate") {
 			// Set contents of Author's Note Template field
 			$("#anotetemplate").val(msg.data);
@ -2842,6 +2899,8 @@ $(document).ready(function(){
 			if (msg.key) {
 				$("#modelkey").removeClass("hidden");
 				$("#modelkey")[0].value = msg.key_value;
+				//if we're in the API list, disable to load button until the model is selected (after the API Key is entered)
+				disableButtons([load_model_accept]);
 			} else {
 				$("#modelkey").addClass("hidden");
 				
@ -2879,6 +2938,7 @@ $(document).ready(function(){
 			}
 		} else if(msg.cmd == 'oai_engines') {
 			$("#oaimodel").removeClass("hidden")
+			enableButtons([load_model_accept]);
 			selected_item = 0;
 			length = $("#oaimodel")[0].options.length;
 			for (let i = 0; i < length; i++) {
@ -2915,7 +2975,18 @@ $(document).ready(function(){
 				opt.innerHTML = engine[1];
 				$("#oaimodel")[0].appendChild(opt);
 			}
+		} else if(msg.cmd == 'showfieldbudget') {
+			let inputElement = document.getElementById(msg.data.field);
+			let tokenBudgetElement = inputElement.parentNode.getElementsByClassName("input-token-usage")[0];
+			if (msg.data.max === null) {
+				tokenBudgetElement.innerText = "";
+			} else {
+				let tokenLength = msg.data.length ?? "?";
+				let tokenMax = msg.data.max ?? "?";
+				tokenBudgetElement.innerText = `${tokenLength}/${tokenMax} Tokens`;
+			}
 		}
+		enableButtons([load_model_accept]);
 	});
 	
 	socket.on('disconnect', function() {
@ -3383,6 +3454,15 @@ $(document).ready(function(){

 		if (handled) ev.preventDefault();
 	});
+
+	$("#anotetemplate").on("input", function() {
+		updateInputBudget(anote_input[0]);
+	})
+
+	registerTokenCounters();
+
+	updateInputBudget(input_text[0]);
+
 });


--- a/static/custom.css
+++ b/static/custom.css
@ -1695,3 +1695,30 @@ body.connected .popupfooter, .popupfooter.always-available {
 	overflow-x: auto;
 	white-space: nowrap;
 }
+
+.tokens-counted {
+	position: relative;
+}
+
+.input-token-usage {
+	color: white;
+	position: absolute;
+	font-size: 10px;
+	bottom: 2px;
+	right: 5px;
+
+	-webkit-user-select: none;
+	-moz-user-select: none;
+	-ms-user-select: none;
+	user-select: none;
+}
+
+/* Override needed here due to the 10px right padding on inputrowleft; add 10 px. */
+#inputrowleft > .input-token-usage {
+	right: 15px;
+	bottom: 1px;
+}
+
+.wientry > .input-token-usage {
+	bottom: 8px;
+}
--- a/templates/index.html
+++ b/templates/index.html
@ -157,7 +157,7 @@
 				<div id="inputrowmode">
 					<button type="button" class="btn btn-secondary hidden" id="btnmode">Mode:<br/><b id="btnmode_label">Story</b></button>
 				</div>
-				<div id="inputrowleft">
+				<div id="inputrowleft" class="tokens-counted">
 					<textarea class="form-control" id="input_text" placeholder="Enter text here"></textarea>
 				</div>
 				<div id="inputrowright">
@ -170,7 +170,7 @@
 						<div class="anotelabel no-padding">
 							Author's Note
 						</div>
-						<div class="anotefield">
+						<div class="anotefield tokens-counted">
 							<textarea class="form-control" placeholder="Author's Note" id="anoteinput"></textarea>
 						</div>
 					</div>