Merge branch 'united' of https://github.com/ebolam/KoboldAI into united

2025-06-05 21:59:24 +02:00 · 2022-07-15 12:30:18 -04:00
parent d91ed3141d 9c136985a7
commit 68d143b80c
13 changed files with 147 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,8 @@ softprompts
 models
 !models/models go here.txt
 Uninstall
+flask_session
+accelerate-disk-cache
 .ipynb_checkpoints

 # Ignore PyCharm project files.
--- a/aiserver.py
+++ b/aiserver.py
@@ -224,7 +224,7 @@ class vars:
    model_type  = ""     # Model Type (Automatically taken from the model config)
    noai        = False  # Runs the script without starting up the transformers pipeline
    aibusy      = False  # Stops submissions while the AI is working
-    max_length  = 2048    # Maximum number of tokens to submit per action
+    max_length  = 1024    # Maximum number of tokens to submit per action
    ikmax       = 3000   # Maximum number of characters to submit to InferKit
    genamt      = 80     # Amount of text for each action to generate
    ikgen       = 200    # Number of characters for InferKit to generate
@@ -646,6 +646,11 @@ def move_model_to_devices(model):
    import breakmodel

    if(utils.HAS_ACCELERATE):
+        import accelerate.utils
+        for key, value in model.state_dict().items():
+            target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16
+            if(value.dtype is not target_dtype):
+                accelerate.utils.set_module_tensor_to_device(model, key, target_dtype)
        disk_blocks = breakmodel.disk_blocks
        gpu_blocks = breakmodel.gpu_blocks
        ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks)
@@ -5544,9 +5549,6 @@ def loadRequest(loadpath, filename=None):
                ln = len(vars.actions[vars.actions.get_last_key()].rstrip())
                footer += vars.actions[vars.actions.get_last_key()][ln:]
                vars.actions[vars.actions.get_last_key()] = vars.actions[vars.actions.get_last_key()][:ln]
-            if(len(vars.actions) == 0):
-                vars.gamestarted = False
-
        
        # Try not to break older save files
        if("authorsnote" in js):
--- a/environments/finetuneanon.yml
+++ b/environments/finetuneanon.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
  - colorama
  - flask-socketio
+  - flask-session
  - pytorch
  - cudatoolkit=11.1
  - tensorflow-gpu
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
  - colorama
  - flask-socketio
+  - flask-session
  - pytorch=1.11.*
  - python=3.8.*
  - cudatoolkit=11.1
--- a/environments/rocm-finetune.yml
+++ b/environments/rocm-finetune.yml
@@ -5,6 +5,7 @@ channels:
 dependencies:
  - colorama
  - flask-socketio
+  - flask-session
  - python=3.8.*
  - eventlet
  - markdown
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -5,6 +5,7 @@ channels:
 dependencies:
  - colorama
  - flask-socketio
+  - flask-session
  - python=3.8.*
  - eventlet
  - markdown
--- a/gensettings.py
+++ b/gensettings.py
@@ -17,7 +17,7 @@ gensettingstf = [
 	"id": "settemp", 
 	"min": 0.1,
 	"max": 2.0,
-	"step": 0.05,
+	"step": 0.01,
 	"default": 0.5,
    "tooltip": "Randomness of sampling. High values can increase creativity but may make text less sensible. Lower values will make text more predictable but can become repetitious."
 	},
@@ -28,7 +28,7 @@ gensettingstf = [
 	"id": "settopp", 
 	"min": 0.0,
 	"max": 1.0,
-	"step": 0.05,
+	"step": 0.01,
 	"default": 0.9,
    "tooltip": "Used to discard unlikely text in the sampling process. Lower values will make text more predictable but can become repetitious. (Put this value on 1 to disable its effect)"
 	},
@@ -50,7 +50,7 @@ gensettingstf = [
 	"id": "settfs", 
 	"min": 0.0,
 	"max": 1.0,
-	"step": 0.05,
+	"step": 0.01,
 	"default": 1.0,
    "tooltip": "Alternative sampling method; it is recommended to disable top_p and top_k (set top_p to 1 and top_k to 0) if using this. 0.95 is thought to be a good value. (Put this value on 1 to disable its effect)"
 	},
@@ -61,7 +61,7 @@ gensettingstf = [
 	"id": "settypical", 
 	"min": 0.0,
 	"max": 1.0,
-	"step": 0.05,
+	"step": 0.01,
 	"default": 1.0,
    "tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
 	},
--- a/maps/bloom.json
+++ b/maps/bloom.json
@@ -0,0 +1,30 @@
+{
+  "mtj_compat": "bloom",
+  "mtj_pe": "alibi",
+  "mtj_config_map": {
+    "d_model": "n_embed",
+    "n_heads": "num_attention_heads",
+    "layers": "n_layer"
+  },
+  "static_weights": {
+    "word_embeddings.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "word_embeddings_layernorm.weight": {"mtj": {"module": "embedding_shard/~/replicated_layer_norm", "param": "scale"}},
+    "word_embeddings_layernorm.bias": {"mtj": {"module": "embedding_shard/~/replicated_layer_norm", "param": "offset"}},
+    "ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
+    "ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}
+  },
+  "layer_weights": {
+    "h.{layer}.self_attention.query_key_value.weight": {"mtj": {"module": "layer_{layer}/~/combined_qkv", "param": "w"}},
+    "h.{layer}.self_attention.query_key_value.bias": {"mtj": {"module": "layer_{layer}/~/combined_qkv", "param": "b"}},
+    "h.{layer}.self_attention.dense.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "h.{layer}.self_attention.dense.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "h.{layer}.mlp.dense_h_to_4h.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "h.{layer}.mlp.dense_h_to_4h.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "h.{layer}.mlp.dense_4h_to_h.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "h.{layer}.mlp.dense_4h_to_h.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "h.{layer}.input_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "h.{layer}.input_layernorm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "h.{layer}.post_attention_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "h.{layer}.post_attention_layernorm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -5,6 +5,7 @@ requests
 optax >= 0.0.5, <= 0.0.9
 dm-haiku == 0.0.5
 jax == 0.2.21
+jaxlib >= 0.1.69, <= 0.3.7
 transformers >= 4.19
 progressbar2
 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
--- a/static/application.js
+++ b/static/application.js
@@ -87,6 +87,7 @@ var wiscroll = 0;
 var editmode = false;
 var connected = false;
 var newly_loaded = true;
+var all_modified_chunks = new Set();
 var modified_chunks = new Set();
 var empty_chunks = new Set();
 var gametext_bound = false;
@@ -129,6 +130,7 @@ var adventure = false;
 var chatmode = false;

 var sliders_throttle = getThrottle(200);
+var submit_throttle = null;

 //=================================================================//
 //  METHODS
@@ -892,6 +894,17 @@ function dosubmit(disallow_abort) {
 		return;
 	}
 	chunkOnFocusOut("override");
+	// Wait for editor changes to be applied before submitting
+	submit_throttle = getThrottle(70);
+	submit_throttle.txt = txt;
+	submit_throttle.disallow_abort = disallow_abort;
+	submit_throttle(0, _dosubmit);
+}
+
+function _dosubmit() {
+	var txt = submit_throttle.txt;
+	var disallow_abort = submit_throttle.disallow_abort;
+	submit_throttle = null;
 	input_text.val("");
 	hideMessage();
 	hidegenseqs();
@@ -1523,14 +1536,30 @@ function chunkOnTextInput(event) {
 			r.deleteContents();
 		}

-		// In Chrome the added <br/> will go outside of the chunks if we press
+		// In Chrome and Safari the added <br/> will go outside of the chunks if we press
 		// enter at the end of the story in the editor, so this is here
 		// to put the <br/> back in the right place
 		var br = $("#_EDITOR_LINEBREAK_")[0];
 		if(br.parentNode === game_text[0]) {
+			var parent = br.previousSibling;
 			if(br.previousSibling.nodeType !== 1) {
+				parent = br.previousSibling.previousSibling;
 				br.previousSibling.previousSibling.appendChild(br.previousSibling);
 			}
+			if(parent.lastChild.tagName === "BR") {
+				parent.lastChild.remove();  // Chrome and Safari also insert an extra <br/> in this case for some reason so we need to remove it
+				if(using_webkit_patch) {
+					// Safari on iOS has a bug where it selects all text in the last chunk of the story when this happens so we collapse the selection to the end of the chunk in that case
+					setTimeout(function() {
+						var s = getSelection();
+						var r = s.getRangeAt(0);
+						r.selectNodeContents(parent);
+						r.collapse(false);
+						s.removeAllRanges();
+						s.addRange(r);
+					}, 2);
+				}
+			}
 			br.previousSibling.appendChild(br);
 			r.selectNodeContents(br.parentNode);
 			s.removeAllRanges();
@@ -1712,6 +1741,7 @@ function applyChunkDeltas(nodes) {
 	var chunks = Array.from(buildChunkSetFromNodeArray(nodes));
 	for(var i = 0; i < chunks.length; i++) {
 		modified_chunks.add(chunks[i]);
+		all_modified_chunks.add(chunks[i]);
 	}
 	setTimeout(function() {
 		var chunks = Array.from(modified_chunks);
@@ -1722,12 +1752,18 @@ function applyChunkDeltas(nodes) {
 				if(!selected_chunks.has(chunks[i])) {
 					modified_chunks.delete(chunks[i]);
 					socket.send({'cmd': 'inlineedit', 'chunk': chunks[i], 'data': formatChunkInnerText(chunk)});
+					if(submit_throttle !== null) {
+						submit_throttle(0, _dosubmit);
+					}
 				}
 				empty_chunks.delete(chunks[i]);
 			} else {
 				if(!selected_chunks.has(chunks[i])) {
 					modified_chunks.delete(chunks[i]);
 					socket.send({'cmd': 'inlineedit', 'chunk': chunks[i], 'data': formatChunkInnerText(chunk)});
+					if(submit_throttle !== null) {
+						submit_throttle(0, _dosubmit);
+					}
 				}
 				empty_chunks.add(chunks[i]);
 			}
@@ -1749,6 +1785,9 @@ function syncAllModifiedChunks(including_selected_chunks=false) {
 				empty_chunks.delete(chunks[i]);
 			}
 			socket.send({'cmd': 'inlineedit', 'chunk': chunks[i], 'data': data});
+			if(submit_throttle !== null) {
+				submit_throttle(0, _dosubmit);
+			}
 		}
 	}
 }
@@ -1801,10 +1840,16 @@ function restorePrompt() {
 			if(this.innerText.trim().length) {
 				saved_prompt = this.innerText.trim();
 				socket.send({'cmd': 'inlinedelete', 'data': this.getAttribute("n")});
+				if(submit_throttle !== null) {
+					submit_throttle(0, _dosubmit);
+				}
 				this.parentNode.removeChild(this);
 				return false;
 			}
 			socket.send({'cmd': 'inlinedelete', 'data': this.getAttribute("n")});
+			if(submit_throttle !== null) {
+				submit_throttle(0, _dosubmit);
+			}
 			this.parentNode.removeChild(this);
 		});
 	}
@@ -1819,6 +1864,9 @@ function restorePrompt() {
 	modified_chunks.delete('0');
 	empty_chunks.delete('0');
 	socket.send({'cmd': 'inlineedit', 'chunk': '0', 'data': saved_prompt});
+	if(submit_throttle !== null) {
+		submit_throttle(0, _dosubmit);
+	}
 }

 function deleteEmptyChunks() {
@@ -1830,13 +1878,21 @@ function deleteEmptyChunks() {
 			restorePrompt();
 		} else {
 			socket.send({'cmd': 'inlinedelete', 'data': chunks[i]});
+			if(submit_throttle !== null) {
+				submit_throttle(0, _dosubmit);
+			}
 		}
 	}
 	if(modified_chunks.has('0')) {
 		modified_chunks.delete(chunks[i]);
 		socket.send({'cmd': 'inlineedit', 'chunk': chunks[i], 'data': formatChunkInnerText(document.getElementById("n0"))});
+		if(submit_throttle !== null) {
+			submit_throttle(0, _dosubmit);
+		}
+	}
+	if(gamestarted) {
+		saved_prompt = formatChunkInnerText($("#n0")[0]);
 	}
-	saved_prompt = formatChunkInnerText($("#n0")[0]);
 }

 function highlightEditingChunks() {
@@ -1860,11 +1916,29 @@ function highlightEditingChunks() {
 }

 function cleanupChunkWhitespace() {
+	unbindGametext();
+
+	var chunks = Array.from(all_modified_chunks);
+	for(var i = 0; i < chunks.length; i++) {
+		var original_chunk = document.getElementById("n" + chunks[i]);
+		if(original_chunk === null || original_chunk.innerText.trim().length === 0) {
+			all_modified_chunks.delete(chunks[i]);
+			modified_chunks.delete(chunks[i]);
+			empty_chunks.add(chunks[i]);
+		}
+	}
+
 	// Merge empty chunks with the next chunk
 	var chunks = Array.from(empty_chunks);
 	chunks.sort(function(e) {parseInt(e)});
 	for(var i = 0; i < chunks.length; i++) {
+		if(chunks[i] == "0") {
+			continue;
+		}
 		var original_chunk = document.getElementById("n" + chunks[i]);
+		if(original_chunk === null) {
+			continue;
+		}
 		var chunk = original_chunk.nextSibling;
 		while(chunk) {
 			if(chunk.tagName === "CHUNK") {
@@ -1874,11 +1948,14 @@ function cleanupChunkWhitespace() {
 		}
 		if(chunk) {
 			chunk.innerText = original_chunk.innerText + chunk.innerText;
+			if(original_chunk.innerText.length != 0 && !modified_chunks.has(chunk.getAttribute("n"))) {
+				modified_chunks.add(chunk.getAttribute("n"));
+			}
 		}
 		original_chunk.innerText = "";
 	}
 	// Move whitespace at the end of non-empty chunks into the beginning of the next non-empty chunk
-	var chunks = Array.from(modified_chunks);
+	var chunks = Array.from(all_modified_chunks);
 	chunks.sort(function(e) {parseInt(e)});
 	for(var i = 0; i < chunks.length; i++) {
 		var original_chunk = document.getElementById("n" + chunks[i]);
@@ -1892,9 +1969,14 @@ function cleanupChunkWhitespace() {
 		var ln = original_chunk.innerText.trimEnd().length;
 		if (chunk) {
 			chunk.innerText = original_chunk.innerText.substring(ln) + chunk.innerText;
+			if(ln != original_chunk.innerText.length && !modified_chunks.has(chunk.getAttribute("n"))) {
+				modified_chunks.add(chunk.getAttribute("n"));
+			}
 		}
 		original_chunk.innerText = original_chunk.innerText.substring(0, ln);
 	}
+
+	bindGametext();
 }

 // This gets run every time the text in a chunk is edited
@@ -1976,6 +2058,7 @@ function chunkOnFocusOut(event) {
 			return;
 		}
 		cleanupChunkWhitespace();
+		all_modified_chunks = new Set();
 		syncAllModifiedChunks(true);
 		setTimeout(function() {
 			var blurred = game_text[0] !== document.activeElement;
@@ -2185,6 +2268,7 @@ $(document).ready(function(){
 			unbindGametext();
 			allowedit = gamestarted && $("#allowediting").prop('checked');
 			game_text.attr('contenteditable', allowedit);
+			all_modified_chunks = new Set();
 			modified_chunks = new Set();
 			empty_chunks = new Set();
 			game_text.html(msg.data);
@@ -2739,6 +2823,12 @@ $(document).ready(function(){
 		chunkOnFocusOut
 	);
 	mutation_observer = new MutationObserver(chunkOnDOMMutate);
+	$("#gamescreen").on('click', function(e) {
+		if(this !== e.target) {
+			return;
+		}
+		document.activeElement.blur();
+	});

 	// This is required for the editor to work correctly in Firefox on desktop
 	// because the gods of HTML and JavaScript say so
--- a/torch_lazy_loader.py
+++ b/torch_lazy_loader.py
@@ -52,7 +52,7 @@ import pickle
 import torch
 import utils
 from torch.nn import Module
-from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, Optional, Tuple, Union


 _EXTRA_STATE_KEY_SUFFIX = '_extra_state'
@@ -73,7 +73,7 @@ STORAGE_TYPE_MAP = {


 class LazyTensor:
-    def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, dtype: Optional[torch.dtype] = None, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
+    def __init__(self, storage_type, key: str, location: str, dtype: Optional[torch.dtype] = None, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
        self.storage_type = storage_type
        self.key = key
        self.location = location
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1246,13 +1246,14 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                if utils.num_shards is not None:
                    utils.current_shard += 1
                for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
+                    model_spec_key = max((k for k in model_spec.keys() if key.endswith(k)), key=len, default=None)

                    # Some model weights are used by transformers but not by MTJ.
                    # We have to materialize these weights anyways because
                    # transformers will throw a tantrum otherwise.  To attain
                    # the least possible memory usage, we create them as meta
                    # tensors, which don't take up any actual CPU or TPU memory.
-                    if key not in model_spec:
+                    if model_spec_key is None:
                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
                        utils.bar.update(1)
                        continue
@@ -1267,7 +1268,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                    if current_offset != model_dict[key].seek_offset:
                        f.read(model_dict[key].seek_offset - current_offset)
                        current_offset = model_dict[key].seek_offset
-                    spec = model_spec[key]
+                    spec = model_spec[model_spec_key]
                    transforms = set(spec.get("transforms", ()))
                    if not isinstance(model_dict[key], torch_lazy_loader.LazyTensor):
                        error = f"Duplicate key {repr(key)}"
--- a/userscripts/kaipreset_basic_phrase_bias.lua
+++ b/userscripts/kaipreset_basic_phrase_bias.lua
@@ -183,8 +183,8 @@ function userscript.genmod()
            max_overlap[i] = 0
            local s = {}
            local z = {[0] = 0}
-            local l = 1
-            local r = 1
+            local l = 0
+            local r = 0
            local n_s = math.min(n_tokens, bias_entry.n_tokens)
            local j = 0
            for k = 1, n_s do