Merge pull request #192 from one-some/ui2-token-view

Token view for context viewer
This commit is contained in:
ebolam
2022-10-11 09:01:02 -04:00
committed by GitHub
3 changed files with 143 additions and 46 deletions

View File

@@ -1,5 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys import os, re, time, threading, json, pickle, base64, copy, tqdm, datetime, sys
from typing import Union
from io import BytesIO from io import BytesIO
from flask import has_request_context, session from flask import has_request_context, session
from flask_socketio import SocketIO, join_room, leave_room from flask_socketio import SocketIO, join_room, leave_room
@@ -180,6 +181,18 @@ class koboldai_vars(object):
def reset_model(self): def reset_model(self):
self._model_settings.reset_for_model_load() self._model_settings.reset_for_model_load()
def get_token_representation(self, text: Union[str, list, None]) -> list:
if not self.tokenizer or not text:
return []
if isinstance(text, str):
encoded = self.tokenizer.encode(text)
else:
encoded = text
# TODO: This might be ineffecient, should we cache some of this?
return [[token, self.tokenizer.decode(token)] for token in encoded]
def calc_ai_text(self, submitted_text="", return_text=False): def calc_ai_text(self, submitted_text="", return_text=False):
#start_time = time.time() #start_time = time.time()
if self.alt_gen: if self.alt_gen:
@@ -198,7 +211,7 @@ class koboldai_vars(object):
# TODO: We may want to replace the "text" variable with a list-type # TODO: We may want to replace the "text" variable with a list-type
# class of context blocks, the class having a __str__ function. # class of context blocks, the class having a __str__ function.
if self.sp_length > 0: if self.sp_length > 0:
context.append({"type": "soft_prompt", "text": f"<{self.sp_length} tokens of Soft Prompt.>", "tokens": self.sp_length}) context.append({"type": "soft_prompt", "text": f"<{self.sp_length} tokens of Soft Prompt.>", "tokens": [-1] * self.sp_length})
# Header is never used? # Header is never used?
# if koboldai_vars.model not in ("Colab", "API", "OAI") and self.tokenizer._koboldai_header: # if koboldai_vars.model not in ("Colab", "API", "OAI") and self.tokenizer._koboldai_header:
# context.append({"type": "header", "text": f"{len(self.tokenizer._koboldai_header}) # context.append({"type": "header", "text": f"{len(self.tokenizer._koboldai_header})
@@ -208,11 +221,16 @@ class koboldai_vars(object):
#Add memory #Add memory
memory_length = self.max_memory_length if self.memory_length > self.max_memory_length else self.memory_length memory_length = self.max_memory_length if self.memory_length > self.max_memory_length else self.memory_length
memory_text = self.memory memory_text = self.memory
memory_encoded = None
if memory_length+used_tokens <= token_budget: if memory_length+used_tokens <= token_budget:
if self.tokenizer is not None and self.memory_length > self.max_memory_length: if self.tokenizer is not None and self.memory_length > self.max_memory_length:
memory_text = self.tokenizer.decode(self.tokenizer.encode(self.memory)[-self.max_memory_length-1:]) memory_encoded = self.tokenizer.encode(self.memory)[-self.max_memory_length-1:]
memory_text = self.tokenizer.decode(memory_encoded)
context.append({"type": "memory", "text": memory_text, "tokens": memory_length}) if not memory_encoded and self.tokenizer:
memory_encoded = self.tokenizer.encode(memory_text)
context.append({"type": "memory", "text": memory_text, "tokens": self.get_token_representation(memory_encoded)})
text += memory_text text += memory_text
#Add constant world info entries to memory #Add constant world info entries to memory
@@ -223,7 +241,11 @@ class koboldai_vars(object):
used_world_info.append(wi['uid']) used_world_info.append(wi['uid'])
self.worldinfo_v2.set_world_info_used(wi['uid']) self.worldinfo_v2.set_world_info_used(wi['uid'])
wi_text = wi['content'] wi_text = wi['content']
context.append({"type": "world_info", "text": wi_text, "tokens": wi['token_length']}) context.append({
"type": "world_info",
"text": wi_text,
"tokens": self.get_token_representation(wi_text),
})
text += wi_text text += wi_text
@@ -268,7 +290,7 @@ class koboldai_vars(object):
used_tokens+=0 if wi['token_length'] is None else wi['token_length'] used_tokens+=0 if wi['token_length'] is None else wi['token_length']
used_world_info.append(wi['uid']) used_world_info.append(wi['uid'])
wi_text = wi['content'] wi_text = wi['content']
context.append({"type": "world_info", "text": wi_text, "tokens": wi['token_length']}) context.append({"type": "world_info", "text": wi_text, "tokens": self.get_token_representation(wi_text)})
text += wi_text text += wi_text
self.worldinfo_v2.set_world_info_used(wi['uid']) self.worldinfo_v2.set_world_info_used(wi['uid'])
@@ -288,31 +310,50 @@ class koboldai_vars(object):
game_context = [] game_context = []
authors_note_final = self.authornotetemplate.replace("<|>", self.authornote) authors_note_final = self.authornotetemplate.replace("<|>", self.authornote)
used_all_tokens = False used_all_tokens = False
for action in range(len(self.actions)): for action in range(len(self.actions)):
self.actions.set_action_in_ai(action, used=False) self.actions.set_action_in_ai(action, used=False)
for i in range(len(action_text_split)-1, -1, -1): for i in range(len(action_text_split)-1, -1, -1):
if action_text_split[i][3] or action_text_split[i][1] == [-1]: if action_text_split[i][3] or action_text_split[i][1] == [-1]:
#We've hit an item we've already included or items that are only prompt. Stop #We've hit an item we've already included or items that are only prompt. Stop
for action in action_text_split[i][1]: for action in action_text_split[i][1]:
if action >= 0: if action >= 0:
self.actions.set_action_in_ai(action) self.actions.set_action_in_ai(action)
break; break
if len(action_text_split) - i - 1 == self.andepth and self.authornote != "": if len(action_text_split) - i - 1 == self.andepth and self.authornote != "":
game_text = "{}{}".format(authors_note_final, game_text) game_text = "{}{}".format(authors_note_final, game_text)
game_context.insert(0, {"type": "authors_note", "text": authors_note_final, "tokens": self.authornote_length}) game_context.insert(0, {"type": "authors_note", "text": authors_note_final, "tokens": self.get_token_representation(authors_note_final)})
length = 0 if self.tokenizer is None else len(self.tokenizer.encode(action_text_split[i][0]))
encoded_action = [] if not self.tokenizer else self.tokenizer.encode(action_text_split[i][0])
length = len(encoded_action)
if length+used_tokens <= token_budget and not used_all_tokens: if length+used_tokens <= token_budget and not used_all_tokens:
used_tokens += length used_tokens += length
selected_text = action_text_split[i][0] selected_text = action_text_split[i][0]
action_text_split[i][3] = True action_text_split[i][3] = True
game_text = "{}{}".format(selected_text, game_text) game_text = "{}{}".format(selected_text, game_text)
if action_text_split[i][1] == [self.actions.action_count+1]: if action_text_split[i][1] == [self.actions.action_count+1]:
game_context.insert(0, {"type": "submit", "text": selected_text, "tokens": length, "action_ids": action_text_split[i][1]}) game_context.insert(0, {
"type": "submit",
"text": selected_text,
"tokens": self.get_token_representation(encoded_action),
"action_ids": action_text_split[i][1]
})
else: else:
game_context.insert(0, {"type": "action", "text": selected_text, "tokens": length, "action_ids": action_text_split[i][1]}) game_context.insert(0, {
"type": "action",
"text": selected_text,
"tokens": self.get_token_representation(encoded_action),
"action_ids": action_text_split[i][1]
})
for action in action_text_split[i][1]: for action in action_text_split[i][1]:
if action >= 0: if action >= 0:
self.actions.set_action_in_ai(action) self.actions.set_action_in_ai(action)
#Now we need to check for used world info entries #Now we need to check for used world info entries
for wi in self.worldinfo_v2: for wi in self.worldinfo_v2:
if wi['uid'] not in used_world_info: if wi['uid'] not in used_world_info:
@@ -336,12 +377,13 @@ class koboldai_vars(object):
used_tokens+=0 if wi['token_length'] is None else wi['token_length'] used_tokens+=0 if wi['token_length'] is None else wi['token_length']
used_world_info.append(wi['uid']) used_world_info.append(wi['uid'])
wi_text = wi["content"] wi_text = wi["content"]
encoded_wi = self.tokenizer.encode(wi_text)
if method == 1: if method == 1:
text = "{}{}".format(wi_text, game_text) text = "{}{}".format(wi_text, game_text)
context.insert(0, {"type": "world_info", "text": wi_text, "tokens": wi['token_length']}) context.insert(0, {"type": "world_info", "text": wi_text, "tokens": self.get_token_representation(encoded_wi)})
else: else:
game_text = "{}{}".format(wi_text, game_text) game_text = "{}{}".format(wi_text, game_text)
game_context.insert(0, {"type": "world_info", "text": wi_text, "tokens": wi['token_length']}) game_context.insert(0, {"type": "world_info", "text": wi_text, "tokens": self.get_token_representation(encoded_wi)})
self.worldinfo_v2.set_world_info_used(wi['uid']) self.worldinfo_v2.set_world_info_used(wi['uid'])
else: else:
used_all_tokens = True used_all_tokens = True
@@ -350,11 +392,11 @@ class koboldai_vars(object):
#if we don't have enough actions to get to author's note depth then we just add it right before the game text #if we don't have enough actions to get to author's note depth then we just add it right before the game text
if len(action_text_split) < self.andepth and self.authornote != "": if len(action_text_split) < self.andepth and self.authornote != "":
game_text = "{}{}".format(authors_note_final, game_text) game_text = "{}{}".format(authors_note_final, game_text)
game_context.insert(0, {"type": "authors_note", "text": authors_note_final, "tokens": authornote_length}) game_context.insert(0, {"type": "authors_note", "text": authors_note_final, "tokens": self.get_token_representation(authors_note_final)})
if self.useprompt: if self.useprompt:
text += prompt_text text += prompt_text
context.append({"type": "prompt", "text": prompt_text, "tokens": prompt_length}) context.append({"type": "prompt", "text": prompt_text, "tokens": self.get_token_representation(prompt_text)})
elif not used_all_tokens: elif not used_all_tokens:
prompt_length = 0 prompt_length = 0
prompt_text = "" prompt_text = ""
@@ -392,12 +434,12 @@ class koboldai_vars(object):
used_tokens+=0 if wi['token_length'] is None else wi['token_length'] used_tokens+=0 if wi['token_length'] is None else wi['token_length']
used_world_info.append(wi['uid']) used_world_info.append(wi['uid'])
wi_text = wi['content'] wi_text = wi['content']
context.append({"type": "world_info", "text": wi_text, "tokens": wi['token_length']}) context.append({"type": "world_info", "text": wi_text, "tokens": self.get_token_representation(wi_text)})
text += wi_text text += wi_text
self.worldinfo_v2.set_world_info_used(wi['uid']) self.worldinfo_v2.set_world_info_used(wi['uid'])
text += prompt_text text += prompt_text
context.append({"type": "prompt", "text": prompt_text, "tokens": prompt_length}) context.append({"type": "prompt", "text": prompt_text, "tokens": self.get_token_representation(prompt_text)})
self.prompt_in_ai = True self.prompt_in_ai = True
else: else:
self.prompt_in_ai = False self.prompt_in_ai = False

View File

@@ -1860,6 +1860,10 @@ body {
height: 100%; height: 100%;
flex-grow: 1; flex-grow: 1;
padding: 0px 10px; padding: 0px 10px;
/* HACK: This is a visually ugly hack to avoid cutting of token tooltips on
the first line. */
padding-top: 15px;
} }
.context-symbol { .context-symbol {
@@ -1874,10 +1878,30 @@ body {
font-family: monospace; font-family: monospace;
} }
.context-block:hover { .context-token {
position: relative;
background-color: inherit;
}
.context-token:hover {
outline: 1px solid gray; outline: 1px solid gray;
} }
.context-token:hover::after {
content: attr(token-id);
position: absolute;
top: -120%;
left: 50%;
transform: translateX(-50%);
padding: 0px 2px;
background-color: rgba(0, 0, 0, 0.6);
pointer-events: none;
z-index: 9999999;
}
.context-sp {background-color: var(--context_colors_soft_prompt);} .context-sp {background-color: var(--context_colors_soft_prompt);}
.context-prompt {background-color: var(--context_colors_prompt);} .context-prompt {background-color: var(--context_colors_prompt);}
.context-wi {background-color: var(--context_colors_world_info);} .context-wi {background-color: var(--context_colors_world_info);}
@@ -2632,6 +2656,7 @@ input[type='range'] {
line-height: 1; line-height: 1;
position: relative; position: relative;
} }
[tooltip]::after { [tooltip]::after {
background-color: rgba(51, 51, 51, 0.9); background-color: rgba(51, 51, 51, 0.9);
border-radius: 0.3rem; border-radius: 0.3rem;
@@ -2649,8 +2674,10 @@ input[type='range'] {
transition: opacity 0.2s; transition: opacity 0.2s;
visibility: hidden; visibility: hidden;
white-space: nowrap; white-space: nowrap;
z-index: 1; z-index: 9999;
pointer-events: none;
} }
@media (max-width: 767px) { @media (max-width: 767px) {
[tooltip].tooltip::before { [tooltip].tooltip::before {
display: none; display: none;

View File

@@ -2839,6 +2839,22 @@ function update_bias_slider_value(slider) {
slider.parentElement.parentElement.querySelector(".bias_slider_cur").textContent = slider.value; slider.parentElement.parentElement.querySelector(".bias_slider_cur").textContent = slider.value;
} }
function distortColor(rgb) {
// rgb are 0..255, NOT NORMALIZED!!!!!!
const brightnessTamperAmplitude = 0.1;
const psuedoHue = 12;
let brightnessDistortion = Math.random() * (255 * brightnessTamperAmplitude);
rgb = rgb.map(x => x + brightnessDistortion);
// Cheap hack to imitate hue rotation
rgb = rgb.map(x => x += (Math.random() * psuedoHue * 2) - psuedoHue);
// Clamp and round
rgb = rgb.map(x => Math.round(Math.max(0, Math.min(255, x))));
return rgb;
}
function update_context(data) { function update_context(data) {
$(".context-block").remove(); $(".context-block").remove();
@@ -2856,7 +2872,6 @@ function update_context(data) {
} }
for (const entry of data) { for (const entry of data) {
//console.log(entry);
let contextClass = "context-" + ({ let contextClass = "context-" + ({
soft_prompt: "sp", soft_prompt: "sp",
prompt: "prompt", prompt: "prompt",
@@ -2867,14 +2882,27 @@ function update_context(data) {
submit: 'submit' submit: 'submit'
}[entry.type]); }[entry.type]);
let el = document.createElement("span"); let el = $e(
el.classList.add("context-block"); "span",
el.classList.add(contextClass); $el("#context-container"),
el.innerText = entry.text; {classes: ["context-block", contextClass]}
el.title = entry.tokens + " tokens"; );
el.innerHTML = el.innerHTML.replaceAll("<br>", '<span class="material-icons-outlined context-symbol">keyboard_return</span>'); let rgb = window.getComputedStyle(el)["background-color"].match(/(\d+), (\d+), (\d+)/).slice(1, 4).map(Number);
for (const [tokenId, token] of entry.tokens) {
let tokenColor = distortColor(rgb);
tokenColor = "#" + (tokenColor.map((x) => x.toString(16)).join(""));
let tokenEl = $e("span", el, {
classes: ["context-token"],
"token-id": tokenId === -1 ? "Soft" : tokenId,
innerText: token,
"style.backgroundColor": tokenColor,
});
tokenEl.innerHTML = tokenEl.innerHTML.replaceAll("<br>", '<span class="material-icons-outlined context-symbol">keyboard_return</span>');
}
document.getElementById("context-container").appendChild(el); document.getElementById("context-container").appendChild(el);
switch (entry.type) { switch (entry.type) {