mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-03 03:17:54 +01:00
Merge pull request #3001 from 50h100a/alt-spaces
Correctly interpret some alternate whitespaces in token names
This commit is contained in:
commit
9cc8830dd3
@ -89,7 +89,7 @@ function drawChunks(chunks, ids) {
|
|||||||
$('#tokenized_chunks_display').empty();
|
$('#tokenized_chunks_display').empty();
|
||||||
|
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
let chunk = chunks[i].replace(/[▁Ġ]/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
||||||
|
|
||||||
// If <0xHEX>, decode it
|
// If <0xHEX>, decode it
|
||||||
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
|
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
|
||||||
|
@ -160,7 +160,7 @@ function renderTopLogprobs() {
|
|||||||
let matched = false;
|
let matched = false;
|
||||||
for (const [token, probability, log] of candidates) {
|
for (const [token, probability, log] of candidates) {
|
||||||
const container = $('<button class="flex-container flexFlowColumn logprobs_top_candidate"></button>');
|
const container = $('<button class="flex-container flexFlowColumn logprobs_top_candidate"></button>');
|
||||||
const tokenNormalized = String(token).replace(/^▁/g, ' ');
|
const tokenNormalized = String(token).replace(/^[▁Ġ]/g, ' ');
|
||||||
|
|
||||||
if (token === selectedToken || tokenNormalized === selectedToken) {
|
if (token === selectedToken || tokenNormalized === selectedToken) {
|
||||||
matched = true;
|
matched = true;
|
||||||
@ -230,7 +230,7 @@ function onAlternativeClicked(tokenLogprobs, alternative) {
|
|||||||
const replaceIndex = messageLogprobs.findIndex(x => x === tokenLogprobs);
|
const replaceIndex = messageLogprobs.findIndex(x => x === tokenLogprobs);
|
||||||
|
|
||||||
const tokens = messageLogprobs.slice(0, replaceIndex + 1).map(({ token }) => token);
|
const tokens = messageLogprobs.slice(0, replaceIndex + 1).map(({ token }) => token);
|
||||||
tokens[replaceIndex] = String(alternative).replace(/^▁/g, ' ');
|
tokens[replaceIndex] = String(alternative).replace(/^[▁Ġ]/g, ' ').replace(/Ċ/g, '\n');
|
||||||
|
|
||||||
const prefix = continueFrom || '';
|
const prefix = continueFrom || '';
|
||||||
const prompt = prefix + tokens.join('');
|
const prompt = prefix + tokens.join('');
|
||||||
@ -343,7 +343,7 @@ function createSwipe(messageId, prompt) {
|
|||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
function toVisibleWhitespace(input) {
|
function toVisibleWhitespace(input) {
|
||||||
return input.replace(/ /g, '·').replace(/▁/g, '·').replace(/\n/g, '↵');
|
return input.replace(/ /g, '·').replace(/[▁Ġ]/g, '·').replace(/[Ċ\n]/g, '↵');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -362,7 +362,7 @@ function withVirtualWhitespace(text, span) {
|
|||||||
if (text.match(/\s$/)) {
|
if (text.match(/\s$/)) {
|
||||||
result.push($(document.createTextNode('\u200b')));
|
result.push($(document.createTextNode('\u200b')));
|
||||||
}
|
}
|
||||||
if (text.match(/^▁/)) {
|
if (text.match(/^[▁Ġ]/)) {
|
||||||
result.unshift(document.createTextNode('\u200b'));
|
result.unshift(document.createTextNode('\u200b'));
|
||||||
}
|
}
|
||||||
// line breaks are trickier. we don't currently handle consecutive line
|
// line breaks are trickier. we don't currently handle consecutive line
|
||||||
|
Loading…
x
Reference in New Issue
Block a user