correctly interpret some alternate whitespaces in token names
This commit is contained in:
parent
1ac6780e9c
commit
5d5e552cbd
|
@ -89,7 +89,7 @@ function drawChunks(chunks, ids) {
|
|||
$('#tokenized_chunks_display').empty();
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
||||
let chunk = chunks[i].replace(/[▁Ġ]/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
||||
|
||||
// If <0xHEX>, decode it
|
||||
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
|
||||
|
|
|
@ -160,7 +160,7 @@ function renderTopLogprobs() {
|
|||
let matched = false;
|
||||
for (const [token, probability, log] of candidates) {
|
||||
const container = $('<button class="flex-container flexFlowColumn logprobs_top_candidate"></button>');
|
||||
const tokenNormalized = String(token).replace(/^▁/g, ' ');
|
||||
const tokenNormalized = String(token).replace(/^[▁Ġ]/g, ' ');
|
||||
|
||||
if (token === selectedToken || tokenNormalized === selectedToken) {
|
||||
matched = true;
|
||||
|
@ -230,7 +230,7 @@ function onAlternativeClicked(tokenLogprobs, alternative) {
|
|||
const replaceIndex = messageLogprobs.findIndex(x => x === tokenLogprobs);
|
||||
|
||||
const tokens = messageLogprobs.slice(0, replaceIndex + 1).map(({ token }) => token);
|
||||
tokens[replaceIndex] = String(alternative).replace(/^▁/g, ' ');
|
||||
tokens[replaceIndex] = String(alternative).replace(/^[▁Ġ]/g, ' ').replace(/Ċ/g, '\n');
|
||||
|
||||
const prefix = continueFrom || '';
|
||||
const prompt = prefix + tokens.join('');
|
||||
|
@ -343,7 +343,7 @@ function createSwipe(messageId, prompt) {
|
|||
* @returns {string}
|
||||
*/
|
||||
function toVisibleWhitespace(input) {
|
||||
return input.replace(/ /g, '·').replace(/▁/g, '·').replace(/\n/g, '↵');
|
||||
return input.replace(/ /g, '·').replace(/[▁Ġ]/g, '·').replace(/[Ċ\n]/g, '↵');
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -362,7 +362,7 @@ function withVirtualWhitespace(text, span) {
|
|||
if (text.match(/\s$/)) {
|
||||
result.push($(document.createTextNode('\u200b')));
|
||||
}
|
||||
if (text.match(/^▁/)) {
|
||||
if (text.match(/^[▁Ġ]/)) {
|
||||
result.unshift(document.createTextNode('\u200b'));
|
||||
}
|
||||
// line breaks are trickier. we don't currently handle consecutive line
|
||||
|
|
Loading…
Reference in New Issue