From 5540c165cf2d5b5ed3675d8e4bd70b746ed45d5f Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 6 Dec 2023 22:55:17 -0500 Subject: [PATCH 001/179] Refactor server-sent events parsing Create one server-sent events stream class which implements the entire spec (different line endings, chunking, etc) and use it in all the streaming generators. --- public/scripts/kai-settings.js | 35 +++------- public/scripts/nai-settings.js | 57 ++++------------ public/scripts/openai.js | 63 ++++------------- public/scripts/sse-stream.js | 105 +++++++++++++++++++++++++++++ public/scripts/textgen-settings.js | 54 +++++---------- 5 files changed, 158 insertions(+), 156 deletions(-) create mode 100644 public/scripts/sse-stream.js diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 4659b1bd0..4872f1536 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -10,6 +10,7 @@ import { import { power_user, } from './power-user.js'; +import EventSourceStream from './sse-stream.js'; import { getSortableDelay } from './utils.js'; export const kai_settings = { @@ -160,37 +161,21 @@ export async function generateKoboldWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + const eventStream = new EventSourceStream(); + response.body.pipeThrough(eventStream); + const reader = eventStream.readable.getReader(); return async function* streamData() { - const decoder = new TextDecoder(); - const reader = response.body.getReader(); - let getMessage = ''; - let messageBuffer = ''; + let text = ''; while (true) { const { done, value } = await reader.read(); - let response = decoder.decode(value); - let eventList = []; + if (done) return; - // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks - // We need to buffer chunks until we have one or more full messages (separated by double newlines) - messageBuffer += response; - eventList = messageBuffer.split('\n\n'); - // Last element will be an empty string or a leftover partial message - messageBuffer = eventList.pop(); - - for (let event of eventList) { - for (let subEvent of event.split('\n')) { - if (subEvent.startsWith('data')) { - let data = JSON.parse(subEvent.substring(5)); - getMessage += (data?.token || ''); - yield { text: getMessage, swipes: [] }; - } - } - } - - if (done) { - return; + const data = JSON.parse(value.data); + if (data?.token) { + text += data.token; } + yield { text, swipes: [] }; } }; } diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index 0456d6216..024ec5850 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -10,6 +10,7 @@ import { import { getCfgPrompt } from './cfg-scale.js'; import { MAX_CONTEXT_DEFAULT, MAX_RESPONSE_DEFAULT } from './power-user.js'; import { getTextTokens, tokenizers } from './tokenizers.js'; +import EventSourceStream from './sse-stream.js'; import { getSortableDelay, getStringHash, @@ -663,24 +664,6 @@ export function adjustNovelInstructionPrompt(prompt) { return stripedPrompt; } -function tryParseStreamingError(decoded) { - try { - const data = JSON.parse(decoded); - - if (!data) { - return; - } - - if (data.message && data.statusCode >= 400) { - toastr.error(data.message, 'Error'); - throw new Error(data); - } - } - catch { - // No JSON. Do nothing. - } -} - export async function generateNovelWithStreaming(generate_data, signal) { generate_data.streaming = nai_settings.streaming_novel; @@ -690,39 +673,27 @@ export async function generateNovelWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + const eventStream = new EventSourceStream(); + response.body.pipeThrough(eventStream); + const reader = eventStream.readable.getReader(); return async function* streamData() { - const decoder = new TextDecoder(); - const reader = response.body.getReader(); - let getMessage = ''; - let messageBuffer = ''; + let text = ''; while (true) { const { done, value } = await reader.read(); - let decoded = decoder.decode(value); - let eventList = []; + if (done) return; - tryParseStreamingError(decoded); - - // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks - // We need to buffer chunks until we have one or more full messages (separated by double newlines) - messageBuffer += decoded; - eventList = messageBuffer.split('\n\n'); - // Last element will be an empty string or a leftover partial message - messageBuffer = eventList.pop(); - - for (let event of eventList) { - for (let subEvent of event.split('\n')) { - if (subEvent.startsWith('data')) { - let data = JSON.parse(subEvent.substring(5)); - getMessage += (data?.token || ''); - yield { text: getMessage, swipes: [] }; - } - } + const data = JSON.parse(value.data); + if (data.message && data.statusCode >= 400) { + toastr.error(data.message, 'Error'); + throw new Error(data); } - if (done) { - return; + if (data.token) { + text += data.token; } + + yield { text, swipes: [] }; } }; } diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 30f4c6e29..1758a46fb 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -44,6 +44,7 @@ import { import { getCustomStoppingStrings, persona_description_positions, power_user } from './power-user.js'; import { SECRET_KEYS, secret_state, writeSecret } from './secrets.js'; +import EventSourceStream from './sse-stream.js'; import { delay, download, @@ -1565,57 +1566,22 @@ async function sendOpenAIRequest(type, messages, signal) { }); if (stream) { + const eventStream = new EventSourceStream(); + response.body.pipeThrough(eventStream); + const reader = eventStream.readable.getReader(); return async function* streamData() { - const decoder = new TextDecoder(); - const reader = response.body.getReader(); - let getMessage = ''; - let messageBuffer = ''; + let text = ''; while (true) { const { done, value } = await reader.read(); - let decoded = decoder.decode(value); + if (done) return; + if (value.data === '[DONE]') return; - // Claude's streaming SSE messages are separated by \r - if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { - decoded = decoded.replace(/\r/g, ''); - } + tryParseStreamingError(response, value.data); - tryParseStreamingError(response, decoded); + // the first and last messages are undefined, protect against that + text += getStreamingReply(JSON.parse(value.data)); - let eventList = []; - - // ReadableStream's buffer is not guaranteed to contain full SSE messages as they arrive in chunks - // We need to buffer chunks until we have one or more full messages (separated by double newlines) - if (!oai_settings.legacy_streaming) { - messageBuffer += decoded; - eventList = messageBuffer.split('\n\n'); - // Last element will be an empty string or a leftover partial message - messageBuffer = eventList.pop(); - } else { - eventList = decoded.split('\n'); - } - - for (let event of eventList) { - if (event.startsWith('event: completion')) { - event = event.split('\n')[1]; - } - - if (typeof event !== 'string' || !event.length) - continue; - - if (!event.startsWith('data')) - continue; - if (event == 'data: [DONE]') { - return; - } - let data = JSON.parse(event.substring(6)); - // the first and last messages are undefined, protect against that - getMessage = getStreamingReply(getMessage, data); - yield { text: getMessage, swipes: [] }; - } - - if (done) { - return; - } + yield { text, swipes: [] }; } }; } @@ -1633,13 +1599,12 @@ async function sendOpenAIRequest(type, messages, signal) { } } -function getStreamingReply(getMessage, data) { +function getStreamingReply(data) { if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { - getMessage += data?.completion || ''; + return data?.completion || ''; } else { - getMessage += data.choices[0]?.delta?.content || data.choices[0]?.message?.content || data.choices[0]?.text || ''; + return data.choices[0]?.delta?.content || data.choices[0]?.message?.content || data.choices[0]?.text || ''; } - return getMessage; } function handleWindowError(err) { diff --git a/public/scripts/sse-stream.js b/public/scripts/sse-stream.js new file mode 100644 index 000000000..e50bf55f3 --- /dev/null +++ b/public/scripts/sse-stream.js @@ -0,0 +1,105 @@ +/** + * A stream which handles Server-Sent Events from a binary ReadableStream like you get from the fetch API. + */ +class EventSourceStream { + constructor() { + const decoder = new TextDecoderStream('utf-8', { ignoreBOM: true }); + + let streamBuffer = ''; + + let dataBuffer = ''; + let eventType = 'message'; + let lastEventId = ''; + + // https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream Parses a line from the + // event stream. This is hard to read, so here's how it works: The first group matches either a field (field + // name, optional (colon, value)) or a comment (colon, text). That group is optional, and is followed by a group + // which matches a newline. This means that: The only *capturing* groups are the field, field value, comment, + // and newline. This lets us determine what the line is by which capture groups are filled in. The field and + // value groups being present means it's a field, the comment group being present means it's a comment, and + // neither means it's a blank line. This is best viewed in RegExr if you value your sanity. + const parserRegex = /(?:(?:([^\r\n:]+)(?:: ?([^\r\n]*)?)?)|(:[^\r\n]*))?(\r\n|\r|\n)/y; + + function processChunk(controller, isLastChunk) { + while (parserRegex.lastIndex < streamBuffer.length) { + const lastLastIndex = parserRegex.lastIndex; + const matchResult = parserRegex.exec(streamBuffer); + // We need to wait for more data to come in + if (!matchResult) { + if (lastLastIndex !== 0) { + // Slice off what we've successfully parsed so far. lastIndex is set to 0 if there's no match, + // so it'll be set to start off here. + streamBuffer = streamBuffer.slice(lastLastIndex); + } + return; + } + + const field = matchResult[1]; + const value = matchResult[2]; + const comment = matchResult[3]; + const newline = matchResult[4]; + // Corner case: if the last character in the buffer is '\r', we need to wait for more data. These chunks + // could be split up any which way, and it's entirely possible that the next chunk we receive will start + // with '\n', and this trailing '\r' is actually part of a '\r\n' sequence. + if (newline === '\r' && parserRegex.lastIndex === streamBuffer.length && !isLastChunk) { + // Trim off what we've parsed so far, and wait for more data + streamBuffer = streamBuffer.slice(lastLastIndex); + parserRegex.lastIndex = 0; + return; + } + + // https://html.spec.whatwg.org/multipage/server-sent-events.html#processField + if (typeof field === 'string') { + switch (field) { + case 'event': + eventType = value; + break; + case 'data': + // If the data field is empty, there won't be a match for the value. Just add a newline. + if (typeof value === 'string') dataBuffer += value; + dataBuffer += '\n'; + break; + case 'id': + if (!value.includes('\0')) lastEventId = value; + break; + // We do nothing for the `delay` type, and other types are explicitly ignored + } + } else if (typeof comment === 'string') { + continue; + } else { + // https://html.spec.whatwg.org/multipage/server-sent-events.html#dispatchMessage + // Must be a newline. Dispatch the event. + // Skip the event if the data buffer is the empty string. + if (dataBuffer === '') continue; + // Trim the *last* trailing newline + if (dataBuffer[dataBuffer.length - 1] === '\n') { + dataBuffer = dataBuffer.slice(0, -1); + } + const event = new MessageEvent(eventType, { data: dataBuffer, lastEventId }); + controller.enqueue(event); + dataBuffer = ''; + eventType = 'message'; + } + } + } + + const sseStream = new TransformStream({ + transform(chunk, controller) { + streamBuffer += chunk; + processChunk(controller, false); + }, + + flush(controller) { + // If it's the last chunk, trailing carriage returns are allowed + processChunk(controller, true); + }, + }); + + decoder.readable.pipeThrough(sseStream); + + this.readable = sseStream.readable; + this.writable = decoder.writable; + } +} + +export default EventSourceStream; diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 53ecc65a3..756ca02d2 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -14,6 +14,7 @@ import { power_user, registerDebugFunction, } from './power-user.js'; +import EventSourceStream from './sse-stream.js'; import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from './tokenizers.js'; import { getSortableDelay, onlyUnique } from './utils.js'; @@ -475,55 +476,30 @@ async function generateTextGenWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + const eventStream = new EventSourceStream(); + response.body.pipeThrough(eventStream); + const reader = eventStream.readable.getReader(); return async function* streamData() { - const decoder = new TextDecoder(); - const reader = response.body.getReader(); - let getMessage = ''; - let messageBuffer = ''; + let text = ''; const swipes = []; while (true) { const { done, value } = await reader.read(); - // We don't want carriage returns in our messages - let response = decoder.decode(value).replace(/\r/g, ''); + if (done) return; + if (value.data === '[DONE]') return; - tryParseStreamingError(response); + tryParseStreamingError(response, value.data); - let eventList = []; + let data = JSON.parse(value.data); - messageBuffer += response; - eventList = messageBuffer.split('\n\n'); - // Last element will be an empty string or a leftover partial message - messageBuffer = eventList.pop(); - - for (let event of eventList) { - if (event.startsWith('event: completion')) { - event = event.split('\n')[1]; - } - - if (typeof event !== 'string' || !event.length) - continue; - - if (!event.startsWith('data')) - continue; - if (event == 'data: [DONE]') { - return; - } - let data = JSON.parse(event.substring(6)); - - if (data?.choices[0]?.index > 0) { - const swipeIndex = data.choices[0].index - 1; - swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text; - } else { - getMessage += data?.choices[0]?.text || ''; - } - - yield { text: getMessage, swipes: swipes }; + if (data?.choices[0]?.index > 0) { + const swipeIndex = data.choices[0].index - 1; + swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text; + } else { + text += data?.choices[0]?.text || ''; } - if (done) { - return; - } + yield { text, swipes }; } }; } From cdcd913805c6dcf38c7f96252cdb06feed084bed Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 11:02:39 -0500 Subject: [PATCH 002/179] Don't stream events if the API returned a 4xx code --- public/scripts/kai-settings.js | 22 ++++++++++++++++++++++ public/scripts/nai-settings.js | 26 ++++++++++++++++++++++---- public/scripts/openai.js | 5 +++++ public/scripts/textgen-settings.js | 6 ++++++ 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 4872f1536..0b6c9972e 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -154,6 +154,24 @@ export function getKoboldGenerationData(finalPrompt, settings, maxLength, maxCon return generate_data; } +function tryParseStreamingError(response, decoded) { + try { + const data = JSON.parse(decoded); + + if (!data) { + return; + } + + if (data.error) { + toastr.error(data.error.message || response.statusText, 'API returned an error'); + throw new Error(data); + } + } + catch { + // No JSON. Do nothing. + } +} + export async function generateKoboldWithStreaming(generate_data, signal) { const response = await fetch('/generate', { headers: getRequestHeaders(), @@ -161,6 +179,10 @@ export async function generateKoboldWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + if (!response.ok) { + tryParseStreamingError(response, await response.body.text()); + throw new Error(`Got response status ${response.status}`); + } const eventStream = new EventSourceStream(); response.body.pipeThrough(eventStream); const reader = eventStream.readable.getReader(); diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index 024ec5850..fe5996cde 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -664,6 +664,24 @@ export function adjustNovelInstructionPrompt(prompt) { return stripedPrompt; } +function tryParseStreamingError(response, decoded) { + try { + const data = JSON.parse(decoded); + + if (!data) { + return; + } + + if (data.error) { + toastr.error(data.error.message || response.statusText, 'API returned an error'); + throw new Error(data); + } + } + catch { + // No JSON. Do nothing. + } +} + export async function generateNovelWithStreaming(generate_data, signal) { generate_data.streaming = nai_settings.streaming_novel; @@ -673,6 +691,10 @@ export async function generateNovelWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + if (!response.ok) { + tryParseStreamingError(response, await response.body.text()); + throw new Error(`Got response status ${response.status}`); + } const eventStream = new EventSourceStream(); response.body.pipeThrough(eventStream); const reader = eventStream.readable.getReader(); @@ -684,10 +706,6 @@ export async function generateNovelWithStreaming(generate_data, signal) { if (done) return; const data = JSON.parse(value.data); - if (data.message && data.statusCode >= 400) { - toastr.error(data.message, 'Error'); - throw new Error(data); - } if (data.token) { text += data.token; diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 1758a46fb..e354ff130 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1565,6 +1565,11 @@ async function sendOpenAIRequest(type, messages, signal) { signal: signal, }); + if (!response.ok) { + tryParseStreamingError(response, await response.body.text()); + throw new Error(`Got response status ${response.status}`); + } + if (stream) { const eventStream = new EventSourceStream(); response.body.pipeThrough(eventStream); diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 756ca02d2..e6ed6deaa 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -476,6 +476,12 @@ async function generateTextGenWithStreaming(generate_data, signal) { method: 'POST', signal: signal, }); + + if (!response.ok) { + tryParseStreamingError(response, await response.body.text()); + throw new Error(`Got response status ${response.status}`); + } + const eventStream = new EventSourceStream(); response.body.pipeThrough(eventStream); const reader = eventStream.readable.getReader(); From 5569a63595f817eb12a6fb7fae67bb99e0320611 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 12:41:27 -0500 Subject: [PATCH 003/179] Remove legacy_streaming setting This was a workaround for older versions of Slaude that implemented SSE improperly. This was fixed in Slaude 7 months ago, so the workaround can be removed. --- default/settings.json | 1 - public/index.html | 13 ------------- public/scripts/openai.js | 11 ----------- 3 files changed, 25 deletions(-) diff --git a/default/settings.json b/default/settings.json index 8fbaf9fff..1f3a3260d 100644 --- a/default/settings.json +++ b/default/settings.json @@ -596,7 +596,6 @@ "openrouter_model": "OR_Website", "jailbreak_system": true, "reverse_proxy": "", - "legacy_streaming": false, "chat_completion_source": "openai", "max_context_unlocked": false, "api_url_scale": "", diff --git a/public/index.html b/public/index.html index da5b6e0cb..be581c89b 100644 --- a/public/index.html +++ b/public/index.html @@ -759,19 +759,6 @@ -
-
- -
-
- Enable this if the streaming doesn't work with your proxy. -
-
diff --git a/public/scripts/openai.js b/public/scripts/openai.js index e354ff130..5de617283 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -216,7 +216,6 @@ const default_settings = { openrouter_sort_models: 'alphabetically', jailbreak_system: false, reverse_proxy: '', - legacy_streaming: false, chat_completion_source: chat_completion_sources.OPENAI, max_context_unlocked: false, api_url_scale: '', @@ -270,7 +269,6 @@ const oai_settings = { openrouter_sort_models: 'alphabetically', jailbreak_system: false, reverse_proxy: '', - legacy_streaming: false, chat_completion_source: chat_completion_sources.OPENAI, max_context_unlocked: false, api_url_scale: '', @@ -2277,7 +2275,6 @@ function loadOpenAISettings(data, settings) { oai_settings.openai_max_tokens = settings.openai_max_tokens ?? default_settings.openai_max_tokens; oai_settings.bias_preset_selected = settings.bias_preset_selected ?? default_settings.bias_preset_selected; oai_settings.bias_presets = settings.bias_presets ?? default_settings.bias_presets; - oai_settings.legacy_streaming = settings.legacy_streaming ?? default_settings.legacy_streaming; oai_settings.max_context_unlocked = settings.max_context_unlocked ?? default_settings.max_context_unlocked; oai_settings.send_if_empty = settings.send_if_empty ?? default_settings.send_if_empty; oai_settings.wi_format = settings.wi_format ?? default_settings.wi_format; @@ -2340,7 +2337,6 @@ function loadOpenAISettings(data, settings) { $('#wrap_in_quotes').prop('checked', oai_settings.wrap_in_quotes); $('#names_in_completion').prop('checked', oai_settings.names_in_completion); $('#jailbreak_system').prop('checked', oai_settings.jailbreak_system); - $('#legacy_streaming').prop('checked', oai_settings.legacy_streaming); $('#openai_show_external_models').prop('checked', oai_settings.show_external_models); $('#openai_external_category').toggle(oai_settings.show_external_models); $('#use_ai21_tokenizer').prop('checked', oai_settings.use_ai21_tokenizer); @@ -2545,7 +2541,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { bias_preset_selected: settings.bias_preset_selected, reverse_proxy: settings.reverse_proxy, proxy_password: settings.proxy_password, - legacy_streaming: settings.legacy_streaming, max_context_unlocked: settings.max_context_unlocked, wi_format: settings.wi_format, scenario_format: settings.scenario_format, @@ -2906,7 +2901,6 @@ function onSettingsPresetChange() { continue_nudge_prompt: ['#continue_nudge_prompt_textarea', 'continue_nudge_prompt', false], bias_preset_selected: ['#openai_logit_bias_preset', 'bias_preset_selected', false], reverse_proxy: ['#openai_reverse_proxy', 'reverse_proxy', false], - legacy_streaming: ['#legacy_streaming', 'legacy_streaming', true], wi_format: ['#wi_format_textarea', 'wi_format', false], scenario_format: ['#scenario_format_textarea', 'scenario_format', false], personality_format: ['#personality_format_textarea', 'personality_format', false], @@ -3662,11 +3656,6 @@ $(document).ready(async function () { saveSettingsDebounced(); }); - $('#legacy_streaming').on('input', function () { - oai_settings.legacy_streaming = !!$(this).prop('checked'); - saveSettingsDebounced(); - }); - $('#openai_bypass_status_check').on('input', function () { oai_settings.bypass_status_check = !!$(this).prop('checked'); getStatusOpen(); From 055d6c4337807e9c9e0925b04257fe32bd3130a3 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 18:06:17 -0500 Subject: [PATCH 004/179] Properly forward status codes from streams --- server.js | 63 +++++++++++++--------------------------- src/endpoints/novelai.js | 15 ++-------- src/util.js | 23 +++++++++++++++ 3 files changed, 45 insertions(+), 56 deletions(-) diff --git a/server.js b/server.js index 5c25c2a0c..589bf481c 100644 --- a/server.js +++ b/server.js @@ -7,7 +7,6 @@ const http = require('http'); const https = require('https'); const path = require('path'); const util = require('util'); -const { Readable } = require('stream'); // cli/fs related library imports const open = require('open'); @@ -45,7 +44,20 @@ const basicAuthMiddleware = require('./src/middleware/basicAuthMiddleware'); const { jsonParser, urlencodedParser } = require('./src/express-common.js'); const contentManager = require('./src/endpoints/content-manager'); const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets'); -const { delay, getVersion, getConfigValue, color, uuidv4, tryParse, clientRelativePath, removeFileExtension, generateTimestamp, removeOldBackups, getImages } = require('./src/util'); +const { + delay, + getVersion, + getConfigValue, + color, + uuidv4, + tryParse, + clientRelativePath, + removeFileExtension, + generateTimestamp, + removeOldBackups, + getImages, + forwardFetchResponse, +} = require('./src/util'); const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); const { convertClaudePrompt } = require('./src/chat-completion'); @@ -307,9 +319,7 @@ if (getConfigValue('enableCorsProxy', false) || cliArguments.corsProxy) { }); // Copy over relevant response params to the proxy response - res.statusCode = response.status; - res.statusMessage = response.statusText; - response.body.pipe(res); + forwardFetchResponse(response, res); } catch (error) { res.status(500).send('Error occurred while trying to proxy to: ' + url + ' ' + error); @@ -457,18 +467,9 @@ app.post('/generate', jsonParser, async function (request, response_generate) { const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); if (request.body.streaming) { - request.socket.on('close', function () { - if (response.body instanceof Readable) response.body.destroy(); // Close the remote stream - response_generate.end(); // End the Express response - }); - - response.body.on('end', function () { - console.log('Streaming request finished'); - response_generate.end(); - }); - // Pipe remote SSE stream to Express response - return response.body.pipe(response_generate); + forwardFetchResponse(response, response_generate); + return; } else { if (!response.ok) { const errorText = await response.text(); @@ -666,17 +667,7 @@ app.post('/api/textgenerationwebui/generate', jsonParser, async function (reques if (request.body.stream) { const completionsStream = await fetch(url, args); // Pipe remote SSE stream to Express response - completionsStream.body.pipe(response_generate); - - request.socket.on('close', function () { - if (completionsStream.body instanceof Readable) completionsStream.body.destroy(); // Close the remote stream - response_generate.end(); // End the Express response - }); - - completionsStream.body.on('end', function () { - console.log('Streaming request finished'); - response_generate.end(); - }); + forwardFetchResponse(completionsStream, response_generate); } else { const completionsReply = await fetch(url, args); @@ -1427,17 +1418,7 @@ async function sendClaudeRequest(request, response) { if (request.body.stream) { // Pipe remote SSE stream to Express response - generateResponse.body.pipe(response); - - request.socket.on('close', function () { - if (generateResponse.body instanceof Readable) generateResponse.body.destroy(); // Close the remote stream - response.end(); // End the Express response - }); - - generateResponse.body.on('end', function () { - console.log('Streaming request finished'); - response.end(); - }); + forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); @@ -1640,11 +1621,7 @@ app.post('/generate_openai', jsonParser, function (request, response_generate_op if (fetchResponse.ok) { if (request.body.stream) { console.log('Streaming request in progress'); - fetchResponse.body.pipe(response_generate_openai); - fetchResponse.body.on('end', () => { - console.log('Streaming request finished'); - response_generate_openai.end(); - }); + forwardFetchResponse(fetchResponse, response_generate_openai); } else { let json = await fetchResponse.json(); response_generate_openai.send(json); diff --git a/src/endpoints/novelai.js b/src/endpoints/novelai.js index 2071c4c5b..89b460042 100644 --- a/src/endpoints/novelai.js +++ b/src/endpoints/novelai.js @@ -1,9 +1,8 @@ const fetch = require('node-fetch').default; const express = require('express'); const util = require('util'); -const { Readable } = require('stream'); const { readSecret, SECRET_KEYS } = require('./secrets'); -const { readAllChunks, extractFileFromZipBuffer } = require('../util'); +const { readAllChunks, extractFileFromZipBuffer, forwardFetchResponse } = require('../util'); const { jsonParser } = require('../express-common'); const API_NOVELAI = 'https://api.novelai.net'; @@ -188,17 +187,7 @@ router.post('/generate', jsonParser, async function (req, res) { if (req.body.streaming) { // Pipe remote SSE stream to Express response - response.body.pipe(res); - - req.socket.on('close', function () { - if (response.body instanceof Readable) response.body.destroy(); // Close the remote stream - res.end(); // End the Express response - }); - - response.body.on('end', function () { - console.log('Streaming request finished'); - res.end(); - }); + forwardFetchResponse(response, res); } else { if (!response.ok) { const text = await response.text(); diff --git a/src/util.js b/src/util.js index bc290c9ae..c6f344c71 100644 --- a/src/util.js +++ b/src/util.js @@ -6,6 +6,7 @@ const yauzl = require('yauzl'); const mime = require('mime-types'); const yaml = require('yaml'); const { default: simpleGit } = require('simple-git'); +const { Readable } = require('stream'); const { DIRECTORIES } = require('./constants'); @@ -346,6 +347,27 @@ function getImages(path) { .sort(Intl.Collator().compare); } +/** + * Pipe a fetch() response to an Express.js Response, including status code. + * @param {Response} from The Fetch API response to pipe from. + * @param {Express.Response} to The Express response to pipe to. + */ +function forwardFetchResponse(from, to) { + to.statusCode = from.status; + to.statusMessage = from.statusText; + from.body.pipe(to); + + to.socket.on('close', function () { + if (from.body instanceof Readable) from.body.destroy(); // Close the remote stream + to.end(); // End the Express response + }); + + from.body.on('end', function () { + console.log('Streaming request finished'); + to.end(); + }); +} + module.exports = { getConfig, getConfigValue, @@ -365,4 +387,5 @@ module.exports = { generateTimestamp, removeOldBackups, getImages, + forwardFetchResponse, }; From b0e7b73a32a0431ffc35be15ce41beb5660463f9 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Fri, 8 Dec 2023 02:01:08 +0200 Subject: [PATCH 005/179] Fix streaming processor error handler hooks --- public/script.js | 23 +++++++++++++++++++---- public/scripts/kai-settings.js | 4 ++-- public/scripts/nai-settings.js | 6 +++--- public/scripts/openai.js | 4 ++-- public/scripts/textgen-settings.js | 11 ++++++----- server.js | 19 ++++++++++--------- 6 files changed, 42 insertions(+), 25 deletions(-) diff --git a/public/script.js b/public/script.js index 79fb6a917..26ee43a97 100644 --- a/public/script.js +++ b/public/script.js @@ -2730,6 +2730,10 @@ class StreamingProcessor { this.onErrorStreaming(); } + hook(generatorFn) { + this.generator = generatorFn; + } + *nullStreamingGeneration() { throw new Error('Generation function for streaming is not hooked up'); } @@ -3722,10 +3726,14 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } console.debug(`pushed prompt bits to itemizedPrompts array. Length is now: ${itemizedPrompts.length}`); + /** @type {Promise} */ + let streamingHookPromise = Promise.resolve(); if (main_api == 'openai') { if (isStreamingEnabled() && type !== 'quiet') { - streamingProcessor.generator = await sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal); + streamingHookPromise = sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal) + .then(fn => streamingProcessor.hook(fn)) + .catch(onError); } else { sendOpenAIRequest(type, generate_data.prompt, abortController.signal).then(onSuccess).catch(onError); @@ -3735,13 +3743,19 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, generateHorde(finalPrompt, generate_data, abortController.signal, true).then(onSuccess).catch(onError); } else if (main_api == 'textgenerationwebui' && isStreamingEnabled() && type !== 'quiet') { - streamingProcessor.generator = await generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal); + streamingHookPromise = generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal) + .then(fn => streamingProcessor.hook(fn)) + .catch(onError); } else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { - streamingProcessor.generator = await generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); + streamingHookPromise = generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal) + .then(fn => streamingProcessor.hook(fn)) + .catch(onError); } else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { - streamingProcessor.generator = await generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); + streamingHookPromise = generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal) + .then(fn => streamingProcessor.hook(fn)) + .catch(onError); } else { try { @@ -3767,6 +3781,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (isStreamingEnabled() && type !== 'quiet') { hideSwipeButtons(); + await streamingHookPromise; let getMessage = await streamingProcessor.generate(); let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 0b6c9972e..9183a8b0d 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -163,7 +163,7 @@ function tryParseStreamingError(response, decoded) { } if (data.error) { - toastr.error(data.error.message || response.statusText, 'API returned an error'); + toastr.error(data.error.message || response.statusText, 'KoboldAI API'); throw new Error(data); } } @@ -180,7 +180,7 @@ export async function generateKoboldWithStreaming(generate_data, signal) { signal: signal, }); if (!response.ok) { - tryParseStreamingError(response, await response.body.text()); + tryParseStreamingError(response, await response.text()); throw new Error(`Got response status ${response.status}`); } const eventStream = new EventSourceStream(); diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index fe5996cde..e0feaaaf4 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -672,8 +672,8 @@ function tryParseStreamingError(response, decoded) { return; } - if (data.error) { - toastr.error(data.error.message || response.statusText, 'API returned an error'); + if (data.message || data.error) { + toastr.error(data.message || data.error?.message || response.statusText, 'NovelAI API'); throw new Error(data); } } @@ -692,7 +692,7 @@ export async function generateNovelWithStreaming(generate_data, signal) { signal: signal, }); if (!response.ok) { - tryParseStreamingError(response, await response.body.text()); + tryParseStreamingError(response, await response.text()); throw new Error(`Got response status ${response.status}`); } const eventStream = new EventSourceStream(); diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 5de617283..12cfcd92a 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1123,7 +1123,7 @@ function tryParseStreamingError(response, decoded) { checkQuotaError(data); if (data.error) { - toastr.error(data.error.message || response.statusText, 'API returned an error'); + toastr.error(data.error.message || response.statusText, 'Chat Completion API'); throw new Error(data); } } @@ -1564,7 +1564,7 @@ async function sendOpenAIRequest(type, messages, signal) { }); if (!response.ok) { - tryParseStreamingError(response, await response.body.text()); + tryParseStreamingError(response, await response.text()); throw new Error(`Got response status ${response.status}`); } diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index e6ed6deaa..9c938f701 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -478,7 +478,7 @@ async function generateTextGenWithStreaming(generate_data, signal) { }); if (!response.ok) { - tryParseStreamingError(response, await response.body.text()); + tryParseStreamingError(response, await response.text()); throw new Error(`Got response status ${response.status}`); } @@ -512,14 +512,15 @@ async function generateTextGenWithStreaming(generate_data, signal) { /** * Parses errors in streaming responses and displays them in toastr. - * @param {string} response - Response from the server. + * @param {Response} response - Response from the server. + * @param {string} decoded - Decoded response body. * @returns {void} Nothing. */ -function tryParseStreamingError(response) { +function tryParseStreamingError(response, decoded) { let data = {}; try { - data = JSON.parse(response); + data = JSON.parse(decoded); } catch { // No JSON. Do nothing. } @@ -527,7 +528,7 @@ function tryParseStreamingError(response) { const message = data?.error?.message || data?.message; if (message) { - toastr.error(message, 'API Error'); + toastr.error(message, 'Text Completion API'); throw new Error(message); } } diff --git a/server.js b/server.js index 589bf481c..bf6edb6bd 100644 --- a/server.js +++ b/server.js @@ -1618,16 +1618,17 @@ app.post('/generate_openai', jsonParser, function (request, response_generate_op try { const fetchResponse = await fetch(endpointUrl, config); + if (request.body.stream) { + console.log('Streaming request in progress'); + forwardFetchResponse(fetchResponse, response_generate_openai); + return; + } + if (fetchResponse.ok) { - if (request.body.stream) { - console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); - } else { - let json = await fetchResponse.json(); - response_generate_openai.send(json); - console.log(json); - console.log(json?.choices[0]?.message); - } + let json = await fetchResponse.json(); + response_generate_openai.send(json); + console.log(json); + console.log(json?.choices[0]?.message); } else if (fetchResponse.status === 429 && retries > 0) { console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); setTimeout(() => { From 699c369443185ff2de89128ec022f0cf9e09af6a Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 23:15:42 -0500 Subject: [PATCH 006/179] Remove ignoreBOM Apparently the ignoreBOM option actually means "include the BOM". I've added a test for this in my own repository, and will also be submitting a pull request to MDN to clarify this in their documentation. --- public/scripts/sse-stream.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scripts/sse-stream.js b/public/scripts/sse-stream.js index e50bf55f3..e477d297f 100644 --- a/public/scripts/sse-stream.js +++ b/public/scripts/sse-stream.js @@ -3,7 +3,7 @@ */ class EventSourceStream { constructor() { - const decoder = new TextDecoderStream('utf-8', { ignoreBOM: true }); + const decoder = new TextDecoderStream('utf-8'); let streamBuffer = ''; From d735b12399d427428c24e88a90bf664a9d1b72f7 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 8 Dec 2023 15:04:40 -0500 Subject: [PATCH 007/179] Refactor event stream parsing I was really overcomplicating this before; this is simpler and faster. Passes my existing test suite. --- public/scripts/sse-stream.js | 116 +++++++++++++---------------------- 1 file changed, 44 insertions(+), 72 deletions(-) diff --git a/public/scripts/sse-stream.js b/public/scripts/sse-stream.js index e477d297f..a904105ce 100644 --- a/public/scripts/sse-stream.js +++ b/public/scripts/sse-stream.js @@ -6,92 +6,64 @@ class EventSourceStream { const decoder = new TextDecoderStream('utf-8'); let streamBuffer = ''; - - let dataBuffer = ''; - let eventType = 'message'; let lastEventId = ''; - // https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream Parses a line from the - // event stream. This is hard to read, so here's how it works: The first group matches either a field (field - // name, optional (colon, value)) or a comment (colon, text). That group is optional, and is followed by a group - // which matches a newline. This means that: The only *capturing* groups are the field, field value, comment, - // and newline. This lets us determine what the line is by which capture groups are filled in. The field and - // value groups being present means it's a field, the comment group being present means it's a comment, and - // neither means it's a blank line. This is best viewed in RegExr if you value your sanity. - const parserRegex = /(?:(?:([^\r\n:]+)(?:: ?([^\r\n]*)?)?)|(:[^\r\n]*))?(\r\n|\r|\n)/y; + function processChunk(controller) { + // Events are separated by two newlines + const events = streamBuffer.split(/\r\n\r\n|\r\r|\n\n/g); + if (events.length === 0) return; - function processChunk(controller, isLastChunk) { - while (parserRegex.lastIndex < streamBuffer.length) { - const lastLastIndex = parserRegex.lastIndex; - const matchResult = parserRegex.exec(streamBuffer); - // We need to wait for more data to come in - if (!matchResult) { - if (lastLastIndex !== 0) { - // Slice off what we've successfully parsed so far. lastIndex is set to 0 if there's no match, - // so it'll be set to start off here. - streamBuffer = streamBuffer.slice(lastLastIndex); + // The leftover text to remain in the buffer is whatever doesn't have two newlines after it. If the buffer ended + // with two newlines, this will be an empty string. + streamBuffer = events.pop(); + + for (const eventChunk of events) { + let eventType = 'message'; + // Split up by single newlines. + const lines = eventChunk.split(/\n|\r|\r\n/g); + let eventData = ''; + for (const line of lines) { + const lineMatch = /([^:]+)(?:: ?(.*))?/.exec(line); + if (lineMatch) { + const field = lineMatch[1]; + const value = lineMatch[2] || ''; + + switch (field) { + case 'event': + eventType = value; + break; + case 'data': + eventData += value; + eventData += '\n'; + break; + case 'id': + // The ID field cannot contain null, per the spec + if (!value.includes('\0')) lastEventId = value; + break; + // We do nothing for the `delay` type, and other types are explicitly ignored + } } - return; } - const field = matchResult[1]; - const value = matchResult[2]; - const comment = matchResult[3]; - const newline = matchResult[4]; - // Corner case: if the last character in the buffer is '\r', we need to wait for more data. These chunks - // could be split up any which way, and it's entirely possible that the next chunk we receive will start - // with '\n', and this trailing '\r' is actually part of a '\r\n' sequence. - if (newline === '\r' && parserRegex.lastIndex === streamBuffer.length && !isLastChunk) { - // Trim off what we've parsed so far, and wait for more data - streamBuffer = streamBuffer.slice(lastLastIndex); - parserRegex.lastIndex = 0; - return; + + // https://html.spec.whatwg.org/multipage/server-sent-events.html#dispatchMessage + // Skip the event if the data buffer is the empty string. + if (eventData === '') continue; + + if (eventData[eventData.length - 1] === '\n') { + eventData = eventData.slice(0, -1); } - // https://html.spec.whatwg.org/multipage/server-sent-events.html#processField - if (typeof field === 'string') { - switch (field) { - case 'event': - eventType = value; - break; - case 'data': - // If the data field is empty, there won't be a match for the value. Just add a newline. - if (typeof value === 'string') dataBuffer += value; - dataBuffer += '\n'; - break; - case 'id': - if (!value.includes('\0')) lastEventId = value; - break; - // We do nothing for the `delay` type, and other types are explicitly ignored - } - } else if (typeof comment === 'string') { - continue; - } else { - // https://html.spec.whatwg.org/multipage/server-sent-events.html#dispatchMessage - // Must be a newline. Dispatch the event. - // Skip the event if the data buffer is the empty string. - if (dataBuffer === '') continue; - // Trim the *last* trailing newline - if (dataBuffer[dataBuffer.length - 1] === '\n') { - dataBuffer = dataBuffer.slice(0, -1); - } - const event = new MessageEvent(eventType, { data: dataBuffer, lastEventId }); - controller.enqueue(event); - dataBuffer = ''; - eventType = 'message'; - } + // Trim the *last* trailing newline only. + const event = new MessageEvent(eventType, { data: eventData, lastEventId }); + controller.enqueue(event); } } const sseStream = new TransformStream({ transform(chunk, controller) { streamBuffer += chunk; - processChunk(controller, false); - }, - - flush(controller) { - // If it's the last chunk, trailing carriage returns are allowed - processChunk(controller, true); + processChunk(controller); }, }); From 3cfc32c16d0b8c7f76c52f8fd34ea83af0e4cdd9 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 8 Dec 2023 18:40:17 -0500 Subject: [PATCH 008/179] Refactor error handling Remove the StreamingProcessor.hook method and use a try-catch block to await the generator promise and set the generator, handling errors with onError if it fails. --- public/script.js | 51 ++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/public/script.js b/public/script.js index 26ee43a97..fb1d59ce7 100644 --- a/public/script.js +++ b/public/script.js @@ -2730,10 +2730,6 @@ class StreamingProcessor { this.onErrorStreaming(); } - hook(generatorFn) { - this.generator = generatorFn; - } - *nullStreamingGeneration() { throw new Error('Generation function for streaming is not hooked up'); } @@ -3727,13 +3723,11 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, console.debug(`pushed prompt bits to itemizedPrompts array. Length is now: ${itemizedPrompts.length}`); /** @type {Promise} */ - let streamingHookPromise = Promise.resolve(); + let streamingGeneratorPromise = Promise.resolve(); if (main_api == 'openai') { if (isStreamingEnabled() && type !== 'quiet') { - streamingHookPromise = sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal) - .then(fn => streamingProcessor.hook(fn)) - .catch(onError); + streamingGeneratorPromise = sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal); } else { sendOpenAIRequest(type, generate_data.prompt, abortController.signal).then(onSuccess).catch(onError); @@ -3743,19 +3737,13 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, generateHorde(finalPrompt, generate_data, abortController.signal, true).then(onSuccess).catch(onError); } else if (main_api == 'textgenerationwebui' && isStreamingEnabled() && type !== 'quiet') { - streamingHookPromise = generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal) - .then(fn => streamingProcessor.hook(fn)) - .catch(onError); + streamingGeneratorPromise = generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal); } else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { - streamingHookPromise = generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal) - .then(fn => streamingProcessor.hook(fn)) - .catch(onError); + streamingGeneratorPromise = generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); } else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { - streamingHookPromise = generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal) - .then(fn => streamingProcessor.hook(fn)) - .catch(onError); + streamingGeneratorPromise = generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); } else { try { @@ -3780,20 +3768,27 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } if (isStreamingEnabled() && type !== 'quiet') { - hideSwipeButtons(); - await streamingHookPromise; - let getMessage = await streamingProcessor.generate(); - let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + try { + const streamingGenerator = await streamingGeneratorPromise; + streamingProcessor.generator = streamingGenerator; + hideSwipeButtons(); + let getMessage = await streamingProcessor.generate(); + let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); - if (isContinue) { - getMessage = continue_mag + getMessage; + if (isContinue) { + getMessage = continue_mag + getMessage; + } + + if (streamingProcessor && !streamingProcessor.isStopped && streamingProcessor.isFinished) { + await streamingProcessor.onFinishStreaming(streamingProcessor.messageId, getMessage); + streamingProcessor = null; + triggerAutoContinue(messageChunk, isImpersonate); + } + resolve(); + } catch (err) { + onError(err); } - if (streamingProcessor && !streamingProcessor.isStopped && streamingProcessor.isFinished) { - await streamingProcessor.onFinishStreaming(streamingProcessor.messageId, getMessage); - streamingProcessor = null; - triggerAutoContinue(messageChunk, isImpersonate); - } } async function onSuccess(data) { From a3ec8d709db0a6492ce6070cf3fc611a64ecaff6 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Sat, 9 Dec 2023 17:56:36 +0000 Subject: [PATCH 009/179] add support for tab and shift-tab in QR editor --- .../scripts/extensions/quick-reply/index.js | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 36260bd83..7962cd41d 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -639,7 +639,7 @@ function generateQuickReplyElements() { - +
`; @@ -889,6 +889,57 @@ jQuery(async () => { saveSettingsDebounced(); }); + // taken and adjusted from chats.js (.editor_maximize) + $(document).on('click', '.editor_maximize_with_tab', function () { + const broId = $(this).attr('data-for'); + const bro = $(`#${broId}`); + + if (!bro.length) { + console.error('Could not find editor with id', broId); + return; + } + + const wrapper = document.createElement('div'); + wrapper.classList.add('height100p', 'wide100p', 'flex-container'); + wrapper.classList.add('flexFlowColumn', 'justifyCenter', 'alignitemscenter'); + const textarea = document.createElement('textarea'); + textarea.value = String(bro.val()); + textarea.classList.add('height100p', 'wide100p'); + textarea.addEventListener('keydown', (evt) => { + if (evt.key == 'Tab' && !evt.shiftKey && !evt.ctrlKey && !evt.altKey) { + evt.preventDefault(); + const start = textarea.selectionStart; + const end = textarea.selectionEnd; + if (end - start > 0 && textarea.value.substring(start, end).includes('\n')) { + const lineStart = textarea.value.lastIndexOf('\n', start); + const count = textarea.value.substring(lineStart, end).split('\n').length - 1; + textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n/g, '\n\t')}${textarea.value.substring(end)}`; + textarea.selectionStart = start + 1; + textarea.selectionEnd = end + count; + } else { + textarea.value = `${textarea.value.substring(0, start)}\t${textarea.value.substring(end)}`; + textarea.selectionStart = start + 1; + textarea.selectionEnd = end + 1; + } + } else if (evt.key == 'Tab' && evt.shiftKey && !evt.ctrlKey && !evt.altKey) { + evt.preventDefault(); + const start = textarea.selectionStart; + const end = textarea.selectionEnd; + const lineStart = textarea.value.lastIndexOf('\n', start); + const count = textarea.value.substring(lineStart, end).split('\n\t').length - 1; + textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n\t/g, '\n')}${textarea.value.substring(end)}`; + textarea.selectionStart = start - 1; + textarea.selectionEnd = end - count; + } + }); + textarea.addEventListener('inpupt', () => { + bro.val(textarea.value).trigger('input'); + }); + wrapper.appendChild(textarea); + + callPopup(wrapper, 'text', '', { wide: true, large: true }); + }); + await loadSettings('init'); addQuickReplyBar(); From 0ea0399ed19995b1ed499100bd319ed85ca49f86 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 18:39:19 -0500 Subject: [PATCH 010/179] Separate getStatus into Kobold/textgen versions This adds a bit of duplicate code for the time being, but ultimately makes the code less confusing because we only need to include the bits that are relevant to the specific API in each function. We can also remove API parameters that are useless depending on the endpoint. --- public/script.js | 75 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/public/script.js b/public/script.js index 64cb41752..3713d74e8 100644 --- a/public/script.js +++ b/public/script.js @@ -232,7 +232,6 @@ export { isStreamingEnabled, getThumbnailUrl, getStoppingStrings, - getStatus, reloadMarkdownProcessor, getCurrentChatId, chat, @@ -857,7 +856,7 @@ export async function clearItemizedPrompts() { } } -async function getStatus() { +async function getStatusKobold() { if (main_api == 'koboldhorde') { try { const hordeStatus = await checkHordeStatus(); @@ -870,7 +869,7 @@ async function getStatus() { return resultCheckStatus(); } - const url = main_api == 'textgenerationwebui' ? '/api/textgenerationwebui/status' : '/getstatus'; + const url = '/getstatus'; let endpoint = getAPIServerUrl(); @@ -886,18 +885,64 @@ async function getStatus() { body: JSON.stringify({ main_api, api_server: endpoint, - api_type: textgen_settings.type, - legacy_api: main_api == 'textgenerationwebui' ? - textgen_settings.legacy_api && - textgen_settings.type !== MANCER : - false, }), signal: abortStatusCheck.signal, }); const data = await response.json(); - if (main_api == 'textgenerationwebui' && textgen_settings.type === MANCER) { + + online_status = data?.result; + + if (!online_status) { + online_status = 'no_connection'; + } + + // Determine instruct mode preset + autoSelectInstructPreset(online_status); + + // determine if we can use stop sequence and streaming + setKoboldFlags(data.version, data.koboldVersion); + + // We didn't get a 200 status code, but the endpoint has an explanation. Which means it DID connect, but I digress. + if (online_status === 'no_connection' && data.response) { + toastr.error(data.response, 'API Error', { timeOut: 5000, preventDuplicates: true }); + } + } catch (err) { + console.error('Error getting status', err); + online_status = 'no_connection'; + } + + return resultCheckStatus(); +} + +async function getStatusTextgen() { + const url = '/api/textgenerationwebui/status'; + + let endpoint = getAPIServerUrl(); + + if (!endpoint) { + console.warn('No endpoint for status check'); + return; + } + + try { + const response = await fetch(url, { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + api_server: endpoint, + api_type: textgen_settings.type, + legacy_api: + textgen_settings.legacy_api && + textgen_settings.type !== MANCER, + }), + signal: abortStatusCheck.signal, + }); + + const data = await response.json(); + + if (textgen_settings.type === MANCER) { online_status = textgen_settings.mancer_model; loadMancerModels(data?.data); } else { @@ -911,11 +956,6 @@ async function getStatus() { // Determine instruct mode preset autoSelectInstructPreset(online_status); - // determine if we can use stop sequence and streaming - if (main_api === 'kobold' || main_api === 'koboldhorde') { - setKoboldFlags(data.version, data.koboldVersion); - } - // We didn't get a 200 status code, but the endpoint has an explanation. Which means it DID connect, but I digress. if (online_status === 'no_connection' && data.response) { toastr.error(data.response, 'API Error', { timeOut: 5000, preventDuplicates: true }); @@ -943,6 +983,7 @@ export function resultCheckStatus() { stopStatusLoading(); } +// TODO(valadaptive): remove the usage of this function in the tokenizers code, then remove the function entirely export function getAPIServerUrl() { if (main_api == 'textgenerationwebui') { if (textgen_settings.type === MANCER) { @@ -5314,7 +5355,7 @@ function changeMainAPI() { } if (main_api == 'koboldhorde') { - getStatus(); + getStatusKobold(); getHordeModels(); } @@ -8268,7 +8309,7 @@ jQuery(async function () { main_api = 'kobold'; saveSettingsDebounced(); - getStatus(); + getStatusKobold(); } }); @@ -8304,7 +8345,7 @@ jQuery(async function () { startStatusLoading(); main_api = 'textgenerationwebui'; saveSettingsDebounced(); - getStatus(); + getStatusTextgen(); }); var button = $('#options_button'); From babb127aee992e4fecb03ae7fae12e80ba74ef47 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 18:41:51 -0500 Subject: [PATCH 011/179] Move NovelAI status functions over to the rest Have all the get(...)Status and event handler registrations in the same areas, rather than having the NovelAI ones far away. I want to eventually move all the API-specific stuff into separate modules, but this will make things cleaner for the time being. --- public/script.js | 68 ++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/public/script.js b/public/script.js index 3713d74e8..81bed0af0 100644 --- a/public/script.js +++ b/public/script.js @@ -968,6 +968,22 @@ async function getStatusTextgen() { return resultCheckStatus(); } +async function getStatusNovel() { + try { + const result = await loadNovelSubscriptionData(); + + if (!result) { + throw new Error('Could not load subscription data'); + } + + online_status = getNovelTier(); + } catch { + online_status = 'no_connection'; + } + + resultCheckStatus(); +} + export function startStatusLoading() { $('.api_loading').show(); $('.api_button').addClass('disabled'); @@ -6072,22 +6088,6 @@ export async function displayPastChats() { }); } -async function getStatusNovel() { - try { - const result = await loadNovelSubscriptionData(); - - if (!result) { - throw new Error('Could not load subscription data'); - } - - online_status = getNovelTier(); - } catch { - online_status = 'no_connection'; - } - - resultCheckStatus(); -} - function selectRightMenuWithAnimation(selectedMenuId) { const displayModes = { 'rm_group_chats_block': 'flex', @@ -8348,6 +8348,24 @@ jQuery(async function () { getStatusTextgen(); }); + $('#api_button_novel').on('click', async function (e) { + e.stopPropagation(); + const api_key_novel = String($('#api_key_novel').val()).trim(); + + if (api_key_novel.length) { + await writeSecret(SECRET_KEYS.NOVEL, api_key_novel); + } + + if (!secret_state[SECRET_KEYS.NOVEL]) { + console.log('No secret key saved for NovelAI'); + return; + } + + startStatusLoading(); + // Check near immediately rather than waiting for up to 90s + await getStatusNovel(); + }); + var button = $('#options_button'); var menu = $('#options'); @@ -9034,24 +9052,6 @@ jQuery(async function () { }); //Select chat - $('#api_button_novel').on('click', async function (e) { - e.stopPropagation(); - const api_key_novel = String($('#api_key_novel').val()).trim(); - - if (api_key_novel.length) { - await writeSecret(SECRET_KEYS.NOVEL, api_key_novel); - } - - if (!secret_state[SECRET_KEYS.NOVEL]) { - console.log('No secret key saved for NovelAI'); - return; - } - - startStatusLoading(); - // Check near immediately rather than waiting for up to 90s - await getStatusNovel(); - }); - //**************************CHARACTER IMPORT EXPORT*************************// $('#character_import_button').click(function () { $('#character_import_file').click(); From a23be7d78576308a09633a9297231ee992075e83 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 18:52:14 -0500 Subject: [PATCH 012/179] Clean up CSS for API "connect" buttons Instead of identifying each connect button by ID, we can just use the .api_button class. The .menu_button class *would* override it due to CSS cascade rules (specifically, declarations later in the stylesheet apply over ones that appear earlier), but the `.menu_button.api_button` selector has a higher *specificity* and hence works. --- public/style.css | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/public/style.css b/public/style.css index c79357667..38de605cb 100644 --- a/public/style.css +++ b/public/style.css @@ -1444,9 +1444,7 @@ select option:not(:checked) { display: block; } -#api_button:hover, -#api_button_novel:hover, -#api_button_textgenerationwebui:hover { +.menu_button.api_button:hover { background-color: var(--active); } From 8bad059a62d9bd491f1e78fb4881ec8667003502 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:29:24 -0500 Subject: [PATCH 013/179] Rename /tokenize_via_api endpoint No redirect for this since I don't expect any extensions to be calling this directly. --- public/scripts/tokenizers.js | 4 ++-- server.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index ecab34705..e9a1b905f 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -173,7 +173,7 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensRemote('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensRemote('/tokenize_via_api', str, padding); + return countTokensRemote('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -525,7 +525,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensRemote('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensRemote('/tokenize_via_api', str); + return getTextTokensRemote('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; diff --git a/server.js b/server.js index 613510d5f..6b4d7199d 100644 --- a/server.js +++ b/server.js @@ -1774,7 +1774,7 @@ async function sendAI21Request(request, response) { } -app.post('/tokenize_via_api', jsonParser, async function (request, response) { +app.post('/api/tokenizers/remote/encode', jsonParser, async function (request, response) { if (!request.body) { return response.sendStatus(400); } From 04e92efe298353df7b139a2a61e403f8e49aba23 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:35:49 -0500 Subject: [PATCH 014/179] Move API tokenization endpoint into /tokenizers Requires extracting some more functions out of server.js. --- server.js | 152 +----------------------------------- src/additional-headers.js | 72 +++++++++++++++++ src/endpoints/tokenizers.js | 89 +++++++++++++++++++++ 3 files changed, 162 insertions(+), 151 deletions(-) create mode 100644 src/additional-headers.js diff --git a/server.js b/server.js index 6b4d7199d..b44533d3e 100644 --- a/server.js +++ b/server.js @@ -49,6 +49,7 @@ const { delay, getVersion, getConfigValue, color, uuidv4, tryParse, clientRelati const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); const { convertClaudePrompt } = require('./src/chat-completion'); +const { getOverrideHeaders, setAdditionalHeaders } = require('./src/additional-headers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -119,70 +120,6 @@ const listen = getConfigValue('listen', false); const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; -function getMancerHeaders() { - const apiKey = readSecret(SECRET_KEYS.MANCER); - - return apiKey ? ({ - 'X-API-KEY': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getAphroditeHeaders() { - const apiKey = readSecret(SECRET_KEYS.APHRODITE); - - return apiKey ? ({ - 'X-API-KEY': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getTabbyHeaders() { - const apiKey = readSecret(SECRET_KEYS.TABBY); - - return apiKey ? ({ - 'x-api-key': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getOverrideHeaders(urlHost) { - const requestOverrides = getConfigValue('requestOverrides', []); - const overrideHeaders = requestOverrides?.find((e) => e.hosts?.includes(urlHost))?.headers; - if (overrideHeaders && urlHost) { - return overrideHeaders; - } else { - return {}; - } -} - -/** - * Sets additional headers for the request. - * @param {object} request Original request body - * @param {object} args New request arguments - * @param {string|null} server API server for new request - */ -function setAdditionalHeaders(request, args, server) { - let headers; - - switch (request.body.api_type) { - case TEXTGEN_TYPES.MANCER: - headers = getMancerHeaders(); - break; - case TEXTGEN_TYPES.APHRODITE: - headers = getAphroditeHeaders(); - break; - case TEXTGEN_TYPES.TABBY: - headers = getTabbyHeaders(); - break; - default: - headers = server ? getOverrideHeaders((new URL(server))?.host) : {}; - break; - } - - Object.assign(args.headers, headers); -} - const SETTINGS_FILE = './public/settings.json'; const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, TEXTGEN_TYPES, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); @@ -1774,93 +1711,6 @@ async function sendAI21Request(request, response) { } -app.post('/api/tokenizers/remote/encode', jsonParser, async function (request, response) { - if (!request.body) { - return response.sendStatus(400); - } - const text = String(request.body.text) || ''; - const api = String(request.body.main_api); - const baseUrl = String(request.body.url); - const legacyApi = Boolean(request.body.legacy_api); - - try { - if (api == 'textgenerationwebui') { - const args = { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }; - - setAdditionalHeaders(request, args, null); - - // Convert to string + remove trailing slash + /v1 suffix - let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); - - if (legacyApi) { - url += '/v1/token-count'; - args.body = JSON.stringify({ 'prompt': text }); - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.TABBY: - url += '/v1/token/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/api/extra/tokencount'; - args.body = JSON.stringify({ 'prompt': text }); - break; - default: - url += '/v1/internal/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - } - } - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); - const ids = legacyApi ? [] : (data?.tokens ?? []); - - return response.send({ count, ids }); - } - - else if (api == 'kobold') { - const args = { - method: 'POST', - body: JSON.stringify({ 'prompt': text }), - headers: { 'Content-Type': 'application/json' }, - }; - - let url = String(baseUrl).replace(/\/$/, ''); - url += '/extra/tokencount'; - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = data['value']; - return response.send({ count: count, ids: [] }); - } - - else { - console.log('Unknown API', api); - return response.send({ error: true }); - } - } catch (error) { - console.log(error); - return response.send({ error: true }); - } -}); - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. diff --git a/src/additional-headers.js b/src/additional-headers.js new file mode 100644 index 000000000..61ea1790d --- /dev/null +++ b/src/additional-headers.js @@ -0,0 +1,72 @@ +const { TEXTGEN_TYPES } = require('./constants'); +const { SECRET_KEYS, readSecret } = require('./endpoints/secrets'); +const { getConfigValue } = require('./util'); + +function getMancerHeaders() { + const apiKey = readSecret(SECRET_KEYS.MANCER); + + return apiKey ? ({ + 'X-API-KEY': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getAphroditeHeaders() { + const apiKey = readSecret(SECRET_KEYS.APHRODITE); + + return apiKey ? ({ + 'X-API-KEY': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getTabbyHeaders() { + const apiKey = readSecret(SECRET_KEYS.TABBY); + + return apiKey ? ({ + 'x-api-key': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getOverrideHeaders(urlHost) { + const requestOverrides = getConfigValue('requestOverrides', []); + const overrideHeaders = requestOverrides?.find((e) => e.hosts?.includes(urlHost))?.headers; + if (overrideHeaders && urlHost) { + return overrideHeaders; + } else { + return {}; + } +} + +/** + * Sets additional headers for the request. + * @param {object} request Original request body + * @param {object} args New request arguments + * @param {string|null} server API server for new request + */ +function setAdditionalHeaders(request, args, server) { + let headers; + + switch (request.body.api_type) { + case TEXTGEN_TYPES.MANCER: + headers = getMancerHeaders(); + break; + case TEXTGEN_TYPES.APHRODITE: + headers = getAphroditeHeaders(); + break; + case TEXTGEN_TYPES.TABBY: + headers = getTabbyHeaders(); + break; + default: + headers = server ? getOverrideHeaders((new URL(server))?.host) : {}; + break; + } + + Object.assign(args.headers, headers); +} + +module.exports = { + getOverrideHeaders, + setAdditionalHeaders, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 57abc6b8f..8a4db7728 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -6,7 +6,9 @@ const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); const { convertClaudePrompt } = require('../chat-completion'); const { readSecret, SECRET_KEYS } = require('./secrets'); +const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); +const { setAdditionalHeaders } = require('../additional-headers'); /** * @type {{[key: string]: import("@dqbd/tiktoken").Tiktoken}} Tokenizers cache @@ -534,6 +536,93 @@ router.post('/openai/count', jsonParser, async function (req, res) { } }); +router.post('/remote/encode', jsonParser, async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } + const text = String(request.body.text) || ''; + const api = String(request.body.main_api); + const baseUrl = String(request.body.url); + const legacyApi = Boolean(request.body.legacy_api); + + try { + if (api == 'textgenerationwebui') { + const args = { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }; + + setAdditionalHeaders(request, args, null); + + // Convert to string + remove trailing slash + /v1 suffix + let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); + + if (legacyApi) { + url += '/v1/token-count'; + args.body = JSON.stringify({ 'prompt': text }); + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.TABBY: + url += '/v1/token/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/api/extra/tokencount'; + args.body = JSON.stringify({ 'prompt': text }); + break; + default: + url += '/v1/internal/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + } + } + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); + const ids = legacyApi ? [] : (data?.tokens ?? []); + + return response.send({ count, ids }); + } + + else if (api == 'kobold') { + const args = { + method: 'POST', + body: JSON.stringify({ 'prompt': text }), + headers: { 'Content-Type': 'application/json' }, + }; + + let url = String(baseUrl).replace(/\/$/, ''); + url += '/extra/tokencount'; + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = data['value']; + return response.send({ count: count, ids: [] }); + } + + else { + console.log('Unknown API', api); + return response.send({ error: true }); + } + } catch (error) { + console.log(error); + return response.send({ error: true }); + } +}); + module.exports = { TEXT_COMPLETION_MODELS, getTokenizerModel, From ddd73a204a00bd7f2e91cb1dc3594b673e1f0b59 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:43:33 -0500 Subject: [PATCH 015/179] Remove "remote" language from tokenizer functions We'll be making a distinction between tokenizing *on* the server itself, and tokenizing via the server having the AI service do it. It makes more sense to use the term "remote" for the latter. --- public/scripts/tokenizers.js | 62 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index e9a1b905f..b72b672a8 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -161,19 +161,19 @@ function callTokenizer(type, str, padding) { case tokenizers.NONE: return guesstimate(str) + padding; case tokenizers.GPT2: - return countTokensRemote('/api/tokenizers/gpt2/encode', str, padding); + return countTokensFromServer('/api/tokenizers/gpt2/encode', str, padding); case tokenizers.LLAMA: - return countTokensRemote('/api/tokenizers/llama/encode', str, padding); + return countTokensFromServer('/api/tokenizers/llama/encode', str, padding); case tokenizers.NERD: - return countTokensRemote('/api/tokenizers/nerdstash/encode', str, padding); + return countTokensFromServer('/api/tokenizers/nerdstash/encode', str, padding); case tokenizers.NERD2: - return countTokensRemote('/api/tokenizers/nerdstash_v2/encode', str, padding); + return countTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str, padding); case tokenizers.MISTRAL: - return countTokensRemote('/api/tokenizers/mistral/encode', str, padding); + return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); case tokenizers.YI: - return countTokensRemote('/api/tokenizers/yi/encode', str, padding); + return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensRemote('/api/tokenizers/remote/encode', str, padding); + return countTokensFromServer('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -391,7 +391,7 @@ function getTokenCacheObject() { return tokenCache[String(chatId)]; } -function getRemoteTokenizationParams(str) { +function getServerTokenizationParams(str) { return { text: str, main_api, @@ -404,20 +404,20 @@ function getRemoteTokenizationParams(str) { } /** - * Counts token using the remote server API. + * Counts token using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensRemote(endpoint, str, padding) { +function countTokensFromServer(endpoint, str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteTokenizationParams(str)), + data: JSON.stringify(getServerTokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -450,7 +450,7 @@ function countTokensRemote(endpoint, str, padding) { * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensRemote(endpoint, str, model = '') { +function getTextTokensFromServer(endpoint, str, model = '') { if (model) { endpoint += `?model=${model}`; } @@ -460,7 +460,7 @@ function getTextTokensRemote(endpoint, str, model = '') { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteTokenizationParams(str)), + data: JSON.stringify(getServerTokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -480,7 +480,7 @@ function getTextTokensRemote(endpoint, str, model = '') { * @param {string} endpoint API endpoint. * @param {number[]} ids Array of token ids */ -function decodeTextTokensRemote(endpoint, ids, model = '') { +function decodeTextTokensFromServer(endpoint, ids, model = '') { if (model) { endpoint += `?model=${model}`; } @@ -501,7 +501,7 @@ function decodeTextTokensRemote(endpoint, ids, model = '') { } /** - * Encodes a string to tokens using the remote server API. + * Encodes a string to tokens using the server API. * @param {number} tokenizerType Tokenizer type. * @param {string} str String to tokenize. * @returns {number[]} Array of token ids. @@ -509,23 +509,23 @@ function decodeTextTokensRemote(endpoint, ids, model = '') { export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { case tokenizers.GPT2: - return getTextTokensRemote('/api/tokenizers/gpt2/encode', str); + return getTextTokensFromServer('/api/tokenizers/gpt2/encode', str); case tokenizers.LLAMA: - return getTextTokensRemote('/api/tokenizers/llama/encode', str); + return getTextTokensFromServer('/api/tokenizers/llama/encode', str); case tokenizers.NERD: - return getTextTokensRemote('/api/tokenizers/nerdstash/encode', str); + return getTextTokensFromServer('/api/tokenizers/nerdstash/encode', str); case tokenizers.NERD2: - return getTextTokensRemote('/api/tokenizers/nerdstash_v2/encode', str); + return getTextTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str); case tokenizers.MISTRAL: - return getTextTokensRemote('/api/tokenizers/mistral/encode', str); + return getTextTokensFromServer('/api/tokenizers/mistral/encode', str); case tokenizers.YI: - return getTextTokensRemote('/api/tokenizers/yi/encode', str); + return getTextTokensFromServer('/api/tokenizers/yi/encode', str); case tokenizers.OPENAI: { const model = getTokenizerModel(); - return getTextTokensRemote('/api/tokenizers/openai/encode', str, model); + return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensRemote('/api/tokenizers/remote/encode', str); + return getTextTokensFromServer('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; @@ -533,27 +533,27 @@ export function getTextTokens(tokenizerType, str) { } /** - * Decodes token ids to text using the remote server API. + * Decodes token ids to text using the server API. * @param {number} tokenizerType Tokenizer type. * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { switch (tokenizerType) { case tokenizers.GPT2: - return decodeTextTokensRemote('/api/tokenizers/gpt2/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/gpt2/decode', ids); case tokenizers.LLAMA: - return decodeTextTokensRemote('/api/tokenizers/llama/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/llama/decode', ids); case tokenizers.NERD: - return decodeTextTokensRemote('/api/tokenizers/nerdstash/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/nerdstash/decode', ids); case tokenizers.NERD2: - return decodeTextTokensRemote('/api/tokenizers/nerdstash_v2/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/nerdstash_v2/decode', ids); case tokenizers.MISTRAL: - return decodeTextTokensRemote('/api/tokenizers/mistral/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/mistral/decode', ids); case tokenizers.YI: - return decodeTextTokensRemote('/api/tokenizers/yi/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/yi/decode', ids); case tokenizers.OPENAI: { const model = getTokenizerModel(); - return decodeTextTokensRemote('/api/tokenizers/openai/decode', ids, model); + return decodeTextTokensFromServer('/api/tokenizers/openai/decode', ids, model); } default: console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType); From 18177c147d6530f095657c181e668bea18c51be7 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:08:48 -0500 Subject: [PATCH 016/179] Separate remote and server tokenization code paths This lets us remove extraneous API params from paths where they aren't needed. --- public/scripts/tokenizers.js | 93 ++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index b72b672a8..c67e531a5 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -173,7 +173,7 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensFromServer('/api/tokenizers/remote/encode', str, padding); + return countTokensFromRemoteAPI('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -392,6 +392,12 @@ function getTokenCacheObject() { } function getServerTokenizationParams(str) { + return { + text: str, + }; +} + +function getRemoteAPITokenizationParams(str) { return { text: str, main_api, @@ -404,7 +410,7 @@ function getServerTokenizationParams(str) { } /** - * Counts token using the server API. + * Count tokens using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. @@ -424,18 +430,7 @@ function countTokensFromServer(endpoint, str, padding) { if (typeof data.count === 'number') { tokenCount = data.count; } else { - tokenCount = guesstimate(str); - console.error('Error counting tokens'); - - if (!sessionStorage.getItem(TOKENIZER_WARNING_KEY)) { - toastr.warning( - 'Your selected API doesn\'t support the tokenization endpoint. Using estimated counts.', - 'Error counting tokens', - { timeOut: 10000, preventDuplicates: true }, - ); - - sessionStorage.setItem(TOKENIZER_WARNING_KEY, String(true)); - } + tokenCount = apiFailureTokenCount(str); } }, }); @@ -443,6 +438,51 @@ function countTokensFromServer(endpoint, str, padding) { return tokenCount + padding; } +/** + * Count tokens using the AI provider's API. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {number} padding Number of padding tokens. + * @returns {number} Token count with padding. + */ +function countTokensFromRemoteAPI(endpoint, str, padding) { + let tokenCount = 0; + + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getRemoteAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + if (typeof data.count === 'number') { + tokenCount = data.count; + } else { + tokenCount = apiFailureTokenCount(str); + } + }, + }); + + return tokenCount + padding; +} + +function apiFailureTokenCount(str) { + console.error('Error counting tokens'); + + if (!sessionStorage.getItem(TOKENIZER_WARNING_KEY)) { + toastr.warning( + 'Your selected API doesn\'t support the tokenization endpoint. Using estimated counts.', + 'Error counting tokens', + { timeOut: 10000, preventDuplicates: true }, + ); + + sessionStorage.setItem(TOKENIZER_WARNING_KEY, String(true)); + } + + return guesstimate(str); +} + /** * Calls the underlying tokenizer model to encode a string to tokens. * @param {string} endpoint API endpoint. @@ -475,6 +515,29 @@ function getTextTokensFromServer(endpoint, str, model = '') { return ids; } +/** + * Calls the AI provider's tokenize API to encode a string to tokens. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {string} model Tokenizer model. + * @returns {number[]} Array of token ids. + */ +function getTextTokensFromRemoteAPI(endpoint, str, model = '') { + let ids = []; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getRemoteAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + ids = data.ids; + }, + }); + return ids; +} + /** * Calls the underlying tokenizer model to decode token ids to text. * @param {string} endpoint API endpoint. @@ -525,7 +588,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensFromServer('/api/tokenizers/remote/encode', str); + return getTextTokensFromRemoteAPI('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; From 7486ab3886fd3f9bb27756fe5901170e7cb580f8 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:20:53 -0500 Subject: [PATCH 017/179] Separate textgen and Kobold tokenization APIs They function differently and have different logic and API parameters, so it makes sense to count them as two different APIs. Kobold's API doesn't return tokens, so it can only be used to count them. There's still a lot of duplicate code which I will clean up in the following commits. --- public/scripts/tokenizers.js | 69 ++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c67e531a5..cf0fd3481 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -18,9 +18,10 @@ export const tokenizers = { LLAMA: 3, NERD: 4, NERD2: 5, - API: 6, + API_KOBOLD: 6, MISTRAL: 7, YI: 8, + API_TEXTGENERATIONWEBUI: 9, BEST_MATCH: 99, }; @@ -135,11 +136,11 @@ export function getTokenizerBestMatch(forApi) { if (!hasTokenizerError && isConnected) { if (forApi === 'kobold' && kai_flags.can_use_tokenization) { - return tokenizers.API; + return tokenizers.API_KOBOLD; } if (forApi === 'textgenerationwebui' && isTokenizerSupported) { - return tokenizers.API; + return tokenizers.API_TEXTGENERATIONWEBUI; } } @@ -172,8 +173,10 @@ function callTokenizer(type, str, padding) { return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); - case tokenizers.API: - return countTokensFromRemoteAPI('/api/tokenizers/remote/encode', str, padding); + case tokenizers.API_KOBOLD: + return countTokensFromKoboldAPI('/api/tokenizers/remote/encode', str, padding); + case tokenizers.API_TEXTGENERATIONWEBUI: + return countTokensFromTextgenAPI('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -397,13 +400,21 @@ function getServerTokenizationParams(str) { }; } -function getRemoteAPITokenizationParams(str) { +function getKoboldAPITokenizationParams(str) { return { text: str, - main_api, + main_api: 'kobold', + url: getAPIServerUrl(), + }; +} + +function getTextgenAPITokenizationParams(str) { + return { + text: str, + main_api: 'textgenerationwebui', api_type: textgen_settings.type, url: getAPIServerUrl(), - legacy_api: main_api === 'textgenerationwebui' && + legacy_api: textgen_settings.legacy_api && textgen_settings.type !== MANCER, }; @@ -445,14 +456,43 @@ function countTokensFromServer(endpoint, str, padding) { * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromRemoteAPI(endpoint, str, padding) { +function countTokensFromKoboldAPI(endpoint, str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteAPITokenizationParams(str)), + data: JSON.stringify(getKoboldAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + if (typeof data.count === 'number') { + tokenCount = data.count; + } else { + tokenCount = apiFailureTokenCount(str); + } + }, + }); + + return tokenCount + padding; +} + +/** + * Count tokens using the AI provider's API. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {number} padding Number of padding tokens. + * @returns {number} Token count with padding. + */ +function countTokensFromTextgenAPI(endpoint, str, padding) { + let tokenCount = 0; + + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -519,16 +559,15 @@ function getTextTokensFromServer(endpoint, str, model = '') { * Calls the AI provider's tokenize API to encode a string to tokens. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensFromRemoteAPI(endpoint, str, model = '') { +function getTextTokensFromTextgenAPI(endpoint, str) { let ids = []; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteAPITokenizationParams(str)), + data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -587,8 +626,8 @@ export function getTextTokens(tokenizerType, str) { const model = getTokenizerModel(); return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } - case tokenizers.API: - return getTextTokensFromRemoteAPI('/api/tokenizers/remote/encode', str); + case tokenizers.API_TEXTGENERATIONWEBUI: + return getTextTokensFromTextgenAPI('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; From 30502ac94958f71ac040c8e0c84a2b31e5e57f94 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:26:24 -0500 Subject: [PATCH 018/179] Split up Kobold and textgenerationwebui endpoints The endpoint was one big if/else statement that did two entirely different things depending on the value of main_api. It makes more sense for those to be two separate endpoints. --- public/scripts/tokenizers.js | 14 ++-- src/endpoints/tokenizers.js | 131 ++++++++++++++++++----------------- 2 files changed, 73 insertions(+), 72 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index cf0fd3481..e0d37ddca 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -1,4 +1,4 @@ -import { characters, getAPIServerUrl, main_api, nai_settings, online_status, this_chid } from '../script.js'; +import { characters, main_api, api_server, api_server_textgenerationwebui, nai_settings, online_status, this_chid } from '../script.js'; import { power_user, registerDebugFunction } from './power-user.js'; import { chat_completion_sources, model_list, oai_settings } from './openai.js'; import { groups, selected_group } from './group-chats.js'; @@ -174,9 +174,9 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI('/api/tokenizers/remote/encode', str, padding); + return countTokensFromKoboldAPI('/api/tokenizers/remote/kobold/count', str, padding); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI('/api/tokenizers/remote/encode', str, padding); + return countTokensFromTextgenAPI('/api/tokenizers/remote/textgenerationwebui/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -403,17 +403,15 @@ function getServerTokenizationParams(str) { function getKoboldAPITokenizationParams(str) { return { text: str, - main_api: 'kobold', - url: getAPIServerUrl(), + url: api_server, }; } function getTextgenAPITokenizationParams(str) { return { text: str, - main_api: 'textgenerationwebui', api_type: textgen_settings.type, - url: getAPIServerUrl(), + url: api_server_textgenerationwebui, legacy_api: textgen_settings.legacy_api && textgen_settings.type !== MANCER, @@ -627,7 +625,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API_TEXTGENERATIONWEBUI: - return getTextTokensFromTextgenAPI('/api/tokenizers/remote/encode', str); + return getTextTokensFromTextgenAPI('/api/tokenizers/textgenerationwebui/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 8a4db7728..27ef4faf3 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -536,87 +536,90 @@ router.post('/openai/count', jsonParser, async function (req, res) { } }); -router.post('/remote/encode', jsonParser, async function (request, response) { +router.post('/remote/kobold/count', jsonParser, async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } + const text = String(request.body.text) || ''; + const baseUrl = String(request.body.url); + + try { + const args = { + method: 'POST', + body: JSON.stringify({ 'prompt': text }), + headers: { 'Content-Type': 'application/json' }, + }; + + let url = String(baseUrl).replace(/\/$/, ''); + url += '/extra/tokencount'; + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = data['value']; + return response.send({ count, ids: [] }); + } catch (error) { + console.log(error); + return response.send({ error: true }); + } +}); + +router.post('/remote/textgenerationwebui/encode', jsonParser, async function (request, response) { if (!request.body) { return response.sendStatus(400); } const text = String(request.body.text) || ''; - const api = String(request.body.main_api); const baseUrl = String(request.body.url); const legacyApi = Boolean(request.body.legacy_api); try { - if (api == 'textgenerationwebui') { - const args = { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }; + const args = { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }; - setAdditionalHeaders(request, args, null); + setAdditionalHeaders(request, args, null); - // Convert to string + remove trailing slash + /v1 suffix - let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); + // Convert to string + remove trailing slash + /v1 suffix + let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); - if (legacyApi) { - url += '/v1/token-count'; - args.body = JSON.stringify({ 'prompt': text }); - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.TABBY: - url += '/v1/token/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/api/extra/tokencount'; - args.body = JSON.stringify({ 'prompt': text }); - break; - default: - url += '/v1/internal/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - } + if (legacyApi) { + url += '/v1/token-count'; + args.body = JSON.stringify({ 'prompt': text }); + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.TABBY: + url += '/v1/token/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/api/extra/tokencount'; + args.body = JSON.stringify({ 'prompt': text }); + break; + default: + url += '/v1/internal/encode'; + args.body = JSON.stringify({ 'text': text }); + break; } - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); - const ids = legacyApi ? [] : (data?.tokens ?? []); - - return response.send({ count, ids }); } - else if (api == 'kobold') { - const args = { - method: 'POST', - body: JSON.stringify({ 'prompt': text }), - headers: { 'Content-Type': 'application/json' }, - }; + const result = await fetch(url, args); - let url = String(baseUrl).replace(/\/$/, ''); - url += '/extra/tokencount'; - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = data['value']; - return response.send({ count: count, ids: [] }); - } - - else { - console.log('Unknown API', api); + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); return response.send({ error: true }); } + + const data = await result.json(); + const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); + const ids = legacyApi ? [] : (data?.tokens ?? []); + + return response.send({ count, ids }); } catch (error) { console.log(error); return response.send({ error: true }); From 09465fbb972233bb290989b0eb1b9e400000a3ba Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:35:11 -0500 Subject: [PATCH 019/179] Inline most get(...)TokenizerParams calls For everything except textgenerationwebui, these params are now simple enough that it doesn't make sense for them to be in a separate function. --- public/scripts/tokenizers.js | 44 ++++++++++++++---------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index e0d37ddca..d21c1abb0 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -394,30 +394,6 @@ function getTokenCacheObject() { return tokenCache[String(chatId)]; } -function getServerTokenizationParams(str) { - return { - text: str, - }; -} - -function getKoboldAPITokenizationParams(str) { - return { - text: str, - url: api_server, - }; -} - -function getTextgenAPITokenizationParams(str) { - return { - text: str, - api_type: textgen_settings.type, - url: api_server_textgenerationwebui, - legacy_api: - textgen_settings.legacy_api && - textgen_settings.type !== MANCER, - }; -} - /** * Count tokens using the server API. * @param {string} endpoint API endpoint. @@ -432,7 +408,7 @@ function countTokensFromServer(endpoint, str, padding) { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getServerTokenizationParams(str)), + data: JSON.stringify({ text: str }), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -461,7 +437,10 @@ function countTokensFromKoboldAPI(endpoint, str, padding) { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getKoboldAPITokenizationParams(str)), + data: JSON.stringify({ + text: str, + url: api_server, + }), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -476,6 +455,17 @@ function countTokensFromKoboldAPI(endpoint, str, padding) { return tokenCount + padding; } +function getTextgenAPITokenizationParams(str) { + return { + text: str, + api_type: textgen_settings.type, + url: api_server_textgenerationwebui, + legacy_api: + textgen_settings.legacy_api && + textgen_settings.type !== MANCER, + }; +} + /** * Count tokens using the AI provider's API. * @param {string} endpoint API endpoint. @@ -538,7 +528,7 @@ function getTextTokensFromServer(endpoint, str, model = '') { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getServerTokenizationParams(str)), + data: JSON.stringify({ text: str }), dataType: 'json', contentType: 'application/json', success: function (data) { From 2f2cd197cc5648db9fe28b416252a9009a4a9090 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:48:41 -0500 Subject: [PATCH 020/179] Clean up tokenizer API code Store the URLs for each tokenizer's action in one place at the top of the file, instead of in a bunch of switch-cases. The URLs for the textgen and Kobold APIs don't change and hence don't need to be function arguments. --- public/scripts/tokenizers.js | 174 +++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 79 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index d21c1abb0..1c8420616 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -34,6 +34,51 @@ export const SENTENCEPIECE_TOKENIZERS = [ //tokenizers.NERD2, ]; +const TOKENIZER_URLS = { + [tokenizers.GPT2]: { + encode: '/api/tokenizers/gpt2/encode', + decode: '/api/tokenizers/gpt2/decode', + count: '/api/tokenizers/gpt2/encode', + }, + [tokenizers.OPENAI]: { + encode: '/api/tokenizers/openai/encode', + decode: '/api/tokenizers/openai/decode', + count: '/api/tokenizers/openai/encode', + }, + [tokenizers.LLAMA]: { + encode: '/api/tokenizers/llama/encode', + decode: '/api/tokenizers/llama/decode', + count: '/api/tokenizers/llama/encode', + }, + [tokenizers.NERD]: { + encode: '/api/tokenizers/nerdstash/encode', + decode: '/api/tokenizers/nerdstash/decode', + count: '/api/tokenizers/nerdstash/encode', + }, + [tokenizers.NERD2]: { + encode: '/api/tokenizers/nerdstash_v2/encode', + decode: '/api/tokenizers/nerdstash_v2/decode', + count: '/api/tokenizers/nerdstash_v2/encode', + }, + [tokenizers.API_KOBOLD]: { + count: '/api/tokenizers/remote/kobold/count', + }, + [tokenizers.MISTRAL]: { + encode: '/api/tokenizers/mistral/encode', + decode: '/api/tokenizers/mistral/decode', + count: '/api/tokenizers/mistral/encode', + }, + [tokenizers.YI]: { + encode: '/api/tokenizers/yi/encode', + decode: '/api/tokenizers/yi/decode', + count: '/api/tokenizers/yi/encode', + }, + [tokenizers.API_TEXTGENERATIONWEBUI]: { + encode: '/api/tokenizers/remote/textgenerationwebui/encode', + count: '/api/tokenizers/remote/textgenerationwebui/encode', + }, +}; + const objectStore = new localforage.createInstance({ name: 'SillyTavern_ChatCompletions' }); let tokenCache = {}; @@ -158,28 +203,21 @@ export function getTokenizerBestMatch(forApi) { * @returns {number} Token count. */ function callTokenizer(type, str, padding) { + if (type === tokenizers.NONE) return guesstimate(str) + padding; + switch (type) { - case tokenizers.NONE: - return guesstimate(str) + padding; - case tokenizers.GPT2: - return countTokensFromServer('/api/tokenizers/gpt2/encode', str, padding); - case tokenizers.LLAMA: - return countTokensFromServer('/api/tokenizers/llama/encode', str, padding); - case tokenizers.NERD: - return countTokensFromServer('/api/tokenizers/nerdstash/encode', str, padding); - case tokenizers.NERD2: - return countTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str, padding); - case tokenizers.MISTRAL: - return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); - case tokenizers.YI: - return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI('/api/tokenizers/remote/kobold/count', str, padding); + return countTokensFromKoboldAPI(str, padding); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI('/api/tokenizers/remote/textgenerationwebui/encode', str, padding); - default: - console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str, padding); + return countTokensFromTextgenAPI(str, padding); + default: { + const endpointUrl = TOKENIZER_URLS[type]?.count; + if (!endpointUrl) { + console.warn('Unknown tokenizer type', type); + return callTokenizer(tokenizers.NONE, str, padding); + } + return countTokensFromServer(endpointUrl, str, padding); + } } } @@ -425,18 +463,17 @@ function countTokensFromServer(endpoint, str, padding) { /** * Count tokens using the AI provider's API. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromKoboldAPI(endpoint, str, padding) { +function countTokensFromKoboldAPI(str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_KOBOLD].count, data: JSON.stringify({ text: str, url: api_server, @@ -468,18 +505,17 @@ function getTextgenAPITokenizationParams(str) { /** * Count tokens using the AI provider's API. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromTextgenAPI(endpoint, str, padding) { +function countTokensFromTextgenAPI(str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_TEXTGENERATIONWEBUI].count, data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', @@ -515,14 +551,9 @@ function apiFailureTokenCount(str) { * Calls the underlying tokenizer model to encode a string to tokens. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensFromServer(endpoint, str, model = '') { - if (model) { - endpoint += `?model=${model}`; - } - +function getTextTokensFromServer(endpoint, str) { let ids = []; jQuery.ajax({ async: false, @@ -545,16 +576,15 @@ function getTextTokensFromServer(endpoint, str, model = '') { /** * Calls the AI provider's tokenize API to encode a string to tokens. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @returns {number[]} Array of token ids. */ -function getTextTokensFromTextgenAPI(endpoint, str) { +function getTextTokensFromTextgenAPI(str) { let ids = []; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_TEXTGENERATIONWEBUI].encode, data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', @@ -570,11 +600,7 @@ function getTextTokensFromTextgenAPI(endpoint, str) { * @param {string} endpoint API endpoint. * @param {number[]} ids Array of token ids */ -function decodeTextTokensFromServer(endpoint, ids, model = '') { - if (model) { - endpoint += `?model=${model}`; - } - +function decodeTextTokensFromServer(endpoint, ids) { let text = ''; jQuery.ajax({ async: false, @@ -598,27 +624,24 @@ function decodeTextTokensFromServer(endpoint, ids, model = '') { */ export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { - case tokenizers.GPT2: - return getTextTokensFromServer('/api/tokenizers/gpt2/encode', str); - case tokenizers.LLAMA: - return getTextTokensFromServer('/api/tokenizers/llama/encode', str); - case tokenizers.NERD: - return getTextTokensFromServer('/api/tokenizers/nerdstash/encode', str); - case tokenizers.NERD2: - return getTextTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str); - case tokenizers.MISTRAL: - return getTextTokensFromServer('/api/tokenizers/mistral/encode', str); - case tokenizers.YI: - return getTextTokensFromServer('/api/tokenizers/yi/encode', str); - case tokenizers.OPENAI: { - const model = getTokenizerModel(); - return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); - } case tokenizers.API_TEXTGENERATIONWEBUI: - return getTextTokensFromTextgenAPI('/api/tokenizers/textgenerationwebui/encode', str); - default: - console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); - return []; + return getTextTokensFromTextgenAPI(str); + default: { + const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; + if (!tokenizerEndpoints) { + console.warn('Unknown tokenizer type', tokenizerType); + return []; + } + let endpointUrl = tokenizerEndpoints.encode; + if (!endpointUrl) { + console.warn('This tokenizer type does not support encoding', tokenizerType); + return []; + } + if (tokenizerType === tokenizers.OPENAI) { + endpointUrl += `?model=${getTokenizerModel()}`; + } + return getTextTokensFromServer(endpointUrl, str); + } } } @@ -628,27 +651,20 @@ export function getTextTokens(tokenizerType, str) { * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { - switch (tokenizerType) { - case tokenizers.GPT2: - return decodeTextTokensFromServer('/api/tokenizers/gpt2/decode', ids); - case tokenizers.LLAMA: - return decodeTextTokensFromServer('/api/tokenizers/llama/decode', ids); - case tokenizers.NERD: - return decodeTextTokensFromServer('/api/tokenizers/nerdstash/decode', ids); - case tokenizers.NERD2: - return decodeTextTokensFromServer('/api/tokenizers/nerdstash_v2/decode', ids); - case tokenizers.MISTRAL: - return decodeTextTokensFromServer('/api/tokenizers/mistral/decode', ids); - case tokenizers.YI: - return decodeTextTokensFromServer('/api/tokenizers/yi/decode', ids); - case tokenizers.OPENAI: { - const model = getTokenizerModel(); - return decodeTextTokensFromServer('/api/tokenizers/openai/decode', ids, model); - } - default: - console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType); - return ''; + const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; + if (!tokenizerEndpoints) { + console.warn('Unknown tokenizer type', tokenizerType); + return []; } + let endpointUrl = tokenizerEndpoints.decode; + if (!endpointUrl) { + console.warn('This tokenizer type does not support decoding', tokenizerType); + return []; + } + if (tokenizerType === tokenizers.OPENAI) { + endpointUrl += `?model=${getTokenizerModel()}`; + } + return decodeTextTokensFromServer(endpointUrl, ids); } export async function initTokenizers() { From 014416546ce0ef01c9fd8ba43ac9ab7cedff09bf Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:53:16 -0500 Subject: [PATCH 021/179] Add padding once in getTokenCount This means we don't have to pass the "padding" parameter into every function so they can add the padding themselves--we can do it in just one place instead. --- public/scripts/tokenizers.js | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 1c8420616..6c406531f 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -199,24 +199,23 @@ export function getTokenizerBestMatch(forApi) { * Calls the underlying tokenizer model to the token count for a string. * @param {number} type Tokenizer type. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. * @returns {number} Token count. */ -function callTokenizer(type, str, padding) { - if (type === tokenizers.NONE) return guesstimate(str) + padding; +function callTokenizer(type, str) { + if (type === tokenizers.NONE) return guesstimate(str); switch (type) { case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI(str, padding); + return countTokensFromKoboldAPI(str); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI(str, padding); + return countTokensFromTextgenAPI(str); default: { const endpointUrl = TOKENIZER_URLS[type]?.count; if (!endpointUrl) { console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str, padding); + return callTokenizer(tokenizers.NONE, str); } - return countTokensFromServer(endpointUrl, str, padding); + return countTokensFromServer(endpointUrl, str); } } } @@ -260,7 +259,7 @@ export function getTokenCount(str, padding = undefined) { return cacheObject[cacheKey]; } - const result = callTokenizer(tokenizerType, str, padding); + const result = callTokenizer(tokenizerType, str) + padding; if (isNaN(result)) { console.warn('Token count calculation returned NaN'); @@ -436,10 +435,9 @@ function getTokenCacheObject() { * Count tokens using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromServer(endpoint, str, padding) { +function countTokensFromServer(endpoint, str) { let tokenCount = 0; jQuery.ajax({ @@ -458,16 +456,15 @@ function countTokensFromServer(endpoint, str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } /** * Count tokens using the AI provider's API. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromKoboldAPI(str, padding) { +function countTokensFromKoboldAPI(str) { let tokenCount = 0; jQuery.ajax({ @@ -489,7 +486,7 @@ function countTokensFromKoboldAPI(str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } function getTextgenAPITokenizationParams(str) { @@ -506,10 +503,9 @@ function getTextgenAPITokenizationParams(str) { /** * Count tokens using the AI provider's API. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromTextgenAPI(str, padding) { +function countTokensFromTextgenAPI(str) { let tokenCount = 0; jQuery.ajax({ @@ -528,7 +524,7 @@ function countTokensFromTextgenAPI(str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } function apiFailureTokenCount(str) { From 499d158c11136c8649c7bb3eb75dcdd31de2f54c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:55:34 -0500 Subject: [PATCH 022/179] Remove last usage of getAPIServerUrl Now that we're not using this in the tokenizers code, we can remove it. --- public/script.js | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/public/script.js b/public/script.js index 81bed0af0..da754aa5f 100644 --- a/public/script.js +++ b/public/script.js @@ -871,7 +871,7 @@ async function getStatusKobold() { const url = '/getstatus'; - let endpoint = getAPIServerUrl(); + let endpoint = api_server; if (!endpoint) { console.warn('No endpoint for status check'); @@ -919,7 +919,9 @@ async function getStatusKobold() { async function getStatusTextgen() { const url = '/api/textgenerationwebui/status'; - let endpoint = getAPIServerUrl(); + let endpoint = textgen_settings.type === MANCER ? + MANCER_SERVER : + api_server_textgenerationwebui; if (!endpoint) { console.warn('No endpoint for status check'); @@ -999,23 +1001,6 @@ export function resultCheckStatus() { stopStatusLoading(); } -// TODO(valadaptive): remove the usage of this function in the tokenizers code, then remove the function entirely -export function getAPIServerUrl() { - if (main_api == 'textgenerationwebui') { - if (textgen_settings.type === MANCER) { - return MANCER_SERVER; - } - - return api_server_textgenerationwebui; - } - - if (main_api == 'kobold') { - return api_server; - } - - return ''; -} - export async function selectCharacterById(id) { if (characters[id] == undefined) { return; From c48bc8a76ef9c4c6bf1275249863a4b396e073fe Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 21:29:36 -0500 Subject: [PATCH 023/179] Cache compiled Handlebars templates Since we already have a template cache, it makes sense to store the templates in it *after* compiling them, to avoid the overhead of re-compiling them every time we call renderTemplate. I've also changed the cache from an object to a Map--it's more semantically correct, and avoids weird edge cases like a template named "hasOwnProperty" or some other function that exists as an object property. --- public/script.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/public/script.js b/public/script.js index 64cb41752..ee236f448 100644 --- a/public/script.js +++ b/public/script.js @@ -526,14 +526,17 @@ function getUrlSync(url, cache = true) { }).responseText; } -const templateCache = {}; +const templateCache = new Map(); export function renderTemplate(templateId, templateData = {}, sanitize = true, localize = true, fullPath = false) { try { const pathToTemplate = fullPath ? templateId : `/scripts/templates/${templateId}.html`; - const templateContent = (pathToTemplate in templateCache) ? templateCache[pathToTemplate] : getUrlSync(pathToTemplate); - templateCache[pathToTemplate] = templateContent; - const template = Handlebars.compile(templateContent); + let template = templateCache.get(pathToTemplate); + if (!template) { + const templateContent = getUrlSync(pathToTemplate); + template = Handlebars.compile(templateContent); + templateCache.set(pathToTemplate, template); + } let result = template(templateData); if (sanitize) { From 0fce475a95750e00e539737c3000ad7c0249ad4a Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 22:37:49 -0500 Subject: [PATCH 024/179] Implement random sort with a shuffle Sorting with a random comparator doesn't actually shuffle an array. Depending on the sorting algorithm used, there will be a bias to the shuffle (see https://bost.ocks.org/mike/shuffle/compare.html). If you open that link in Firefox, the bias will be especially bad. Instead of implementing "random" character sort using a random sort comparator, use the shuffle function instead. --- public/scripts/power-user.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index 21806cb99..75f22581d 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -35,7 +35,7 @@ import { registerSlashCommand } from './slash-commands.js'; import { tags } from './tags.js'; import { tokenizers } from './tokenizers.js'; -import { countOccurrences, debounce, delay, isOdd, resetScrollHeight, sortMoments, stringToRange, timestampToMoment } from './utils.js'; +import { countOccurrences, debounce, delay, isOdd, resetScrollHeight, shuffle, sortMoments, stringToRange, timestampToMoment } from './utils.js'; export { loadPowerUserSettings, @@ -1818,10 +1818,6 @@ export function renderStoryString(params) { const sortFunc = (a, b) => power_user.sort_order == 'asc' ? compareFunc(a, b) : compareFunc(b, a); const compareFunc = (first, second) => { - if (power_user.sort_order == 'random') { - return Math.random() > 0.5 ? 1 : -1; - } - const a = first[power_user.sort_field]; const b = second[power_user.sort_field]; @@ -1853,6 +1849,11 @@ function sortEntitiesList(entities) { return; } + if (power_user.sort_order === 'random') { + shuffle(entities); + return; + } + entities.sort((a, b) => { if (a.type === 'tag' && b.type !== 'tag') { return -1; From 55976e61a3b150067f90de4aaa05b2b7fc097077 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 23:57:21 -0500 Subject: [PATCH 025/179] Fix tokenizer override I searched for all users of tokenizers.API, but missed that the menu converts the numerical select values directly to enum values. I've used the special tokenizer value 98 to represent "the tokenizer API for whichever backend we're currently using". --- public/index.html | 2 +- public/scripts/tokenizers.js | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/public/index.html b/public/index.html index da5b6e0cb..8549d6194 100644 --- a/public/index.html +++ b/public/index.html @@ -2438,7 +2438,7 @@ - +
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 6c406531f..5f7fdc01a 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -22,6 +22,7 @@ export const tokenizers = { MISTRAL: 7, YI: 8, API_TEXTGENERATIONWEBUI: 9, + API_CURRENT: 98, BEST_MATCH: 99, }; @@ -195,6 +196,19 @@ export function getTokenizerBestMatch(forApi) { return tokenizers.NONE; } +// Get the current remote tokenizer API based on the current text generation API. +function currentRemoteTokenizerAPI() { + switch (main_api) { + case 'kobold': + case 'koboldhorde': + return tokenizers.API_KOBOLD; + case 'textgenerationwebui': + return tokenizers.API_TEXTGENERATIONWEBUI; + default: + return tokenizers.NONE; + } +} + /** * Calls the underlying tokenizer model to the token count for a string. * @param {number} type Tokenizer type. @@ -205,6 +219,8 @@ function callTokenizer(type, str) { if (type === tokenizers.NONE) return guesstimate(str); switch (type) { + case tokenizers.API_CURRENT: + return callTokenizer(currentRemoteTokenizerAPI(), str); case tokenizers.API_KOBOLD: return countTokensFromKoboldAPI(str); case tokenizers.API_TEXTGENERATIONWEBUI: @@ -620,6 +636,8 @@ function decodeTextTokensFromServer(endpoint, ids) { */ export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { + case tokenizers.API_CURRENT: + return callTokenizer(currentRemoteTokenizerAPI(), str); case tokenizers.API_TEXTGENERATIONWEBUI: return getTextTokensFromTextgenAPI(str); default: { @@ -647,6 +665,10 @@ export function getTextTokens(tokenizerType, str) { * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { + // Currently, neither remote API can decode, but this may change in the future. Put this guard here to be safe + if (tokenizerType === tokenizers.API_CURRENT) { + return decodeTextTokens(tokenizers.NONE); + } const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { console.warn('Unknown tokenizer type', tokenizerType); From 0201a0260c00d0a498617838aefddff5609db3cb Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sun, 10 Dec 2023 04:43:12 -0500 Subject: [PATCH 026/179] Default event type to 'message' if it's empty Added to the upstream test suite. Matches the spec. --- public/scripts/sse-stream.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/sse-stream.js b/public/scripts/sse-stream.js index a904105ce..c9f7158d7 100644 --- a/public/scripts/sse-stream.js +++ b/public/scripts/sse-stream.js @@ -18,7 +18,7 @@ class EventSourceStream { streamBuffer = events.pop(); for (const eventChunk of events) { - let eventType = 'message'; + let eventType = ''; // Split up by single newlines. const lines = eventChunk.split(/\n|\r|\r\n/g); let eventData = ''; @@ -55,7 +55,7 @@ class EventSourceStream { } // Trim the *last* trailing newline only. - const event = new MessageEvent(eventType, { data: eventData, lastEventId }); + const event = new MessageEvent(eventType || 'message', { data: eventData, lastEventId }); controller.enqueue(event); } } From 5f1683f43a750f3eea1c21fa93eb82b48b2557d9 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 15:07:39 +0200 Subject: [PATCH 027/179] More input padding and stricter sanitation --- public/script.js | 2 +- public/scripts/RossAscends-mods.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/script.js b/public/script.js index 64cb41752..aac06febd 100644 --- a/public/script.js +++ b/public/script.js @@ -1494,7 +1494,7 @@ function messageFormatting(mes, ch_name, isSystem, isUser) { mes = mes.replace(new RegExp(`(^|\n)${ch_name}:`, 'g'), '$1'); } - mes = DOMPurify.sanitize(mes); + mes = DOMPurify.sanitize(mes, { FORBID_TAGS: ['style'] }); return mes; } diff --git a/public/scripts/RossAscends-mods.js b/public/scripts/RossAscends-mods.js index a565502eb..eb70341ba 100644 --- a/public/scripts/RossAscends-mods.js +++ b/public/scripts/RossAscends-mods.js @@ -902,7 +902,7 @@ export function initRossMods() { const chatBlock = $('#chat'); const originalScrollBottom = chatBlock[0].scrollHeight - (chatBlock.scrollTop() + chatBlock.outerHeight()); this.style.height = window.getComputedStyle(this).getPropertyValue('min-height'); - this.style.height = this.scrollHeight + 0.1 + 'px'; + this.style.height = this.scrollHeight + 0.3 + 'px'; if (!isFirefox) { const newScrollTop = Math.round(chatBlock[0].scrollHeight - (chatBlock.outerHeight() + originalScrollBottom)); From 6e5eea5dba99d1046c473fb341a9dfc659458992 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 15:56:38 +0200 Subject: [PATCH 028/179] Unbreak previously selected API tokenizer in dropdown --- public/index.html | 2 +- public/scripts/tokenizers.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index 8549d6194..da5b6e0cb 100644 --- a/public/index.html +++ b/public/index.html @@ -2438,7 +2438,7 @@ - +
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 5f7fdc01a..87d72c1a5 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -18,11 +18,11 @@ export const tokenizers = { LLAMA: 3, NERD: 4, NERD2: 5, - API_KOBOLD: 6, + API_CURRENT: 6, MISTRAL: 7, YI: 8, API_TEXTGENERATIONWEBUI: 9, - API_CURRENT: 98, + API_KOBOLD: 10, BEST_MATCH: 99, }; From 6957d9e7cf73e07ed5c59722abb0c61b9484057e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:03:25 +0200 Subject: [PATCH 029/179] Fix display names of Best match tokenizers --- public/scripts/tokenizers.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 87d72c1a5..3d8357e13 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -139,7 +139,18 @@ export function getFriendlyTokenizerName(forApi) { if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) { tokenizerId = getTokenizerBestMatch(forApi); - tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text(); + + switch (tokenizerId) { + case tokenizers.API_KOBOLD: + tokenizerName = 'API (KoboldAI Classic)'; + break; + case tokenizers.API_TEXTGENERATIONWEBUI: + tokenizerName = 'API (Text Completion)'; + break; + default: + tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text(); + break; + } } tokenizerName = forApi == 'openai' From f54bf99006e49625cbedd60c1386d472b2b92632 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:09:00 +0200 Subject: [PATCH 030/179] Fix token ids not displaying in "API_CURRENT" mode for TextGen --- public/scripts/tokenizers.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 3d8357e13..7db5d0887 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -648,7 +648,7 @@ function decodeTextTokensFromServer(endpoint, ids) { export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { case tokenizers.API_CURRENT: - return callTokenizer(currentRemoteTokenizerAPI(), str); + return getTextTokens(currentRemoteTokenizerAPI(), str); case tokenizers.API_TEXTGENERATIONWEBUI: return getTextTokensFromTextgenAPI(str); default: { @@ -678,7 +678,7 @@ export function getTextTokens(tokenizerType, str) { export function decodeTextTokens(tokenizerType, ids) { // Currently, neither remote API can decode, but this may change in the future. Put this guard here to be safe if (tokenizerType === tokenizers.API_CURRENT) { - return decodeTextTokens(tokenizers.NONE); + return decodeTextTokens(tokenizers.NONE, ids); } const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { From 9acef0fae615214991453cc4028c2601227c8854 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:21:06 +0200 Subject: [PATCH 031/179] Horde doesn't support API tokenizers --- public/scripts/tokenizers.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 7db5d0887..bef54b791 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -211,7 +211,6 @@ export function getTokenizerBestMatch(forApi) { function currentRemoteTokenizerAPI() { switch (main_api) { case 'kobold': - case 'koboldhorde': return tokenizers.API_KOBOLD; case 'textgenerationwebui': return tokenizers.API_TEXTGENERATIONWEBUI; @@ -240,7 +239,7 @@ function callTokenizer(type, str) { const endpointUrl = TOKENIZER_URLS[type]?.count; if (!endpointUrl) { console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str); + return apiFailureTokenCount(str); } return countTokensFromServer(endpointUrl, str); } @@ -654,11 +653,13 @@ export function getTextTokens(tokenizerType, str) { default: { const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { + apiFailureTokenCount(str); console.warn('Unknown tokenizer type', tokenizerType); return []; } let endpointUrl = tokenizerEndpoints.encode; if (!endpointUrl) { + apiFailureTokenCount(str); console.warn('This tokenizer type does not support encoding', tokenizerType); return []; } From af89cfa870654817b4fc40a0dbe2cd9ff341eeff Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:48:25 +0200 Subject: [PATCH 032/179] Code clean-up --- public/scripts/chats.js | 35 +++++++++++- .../scripts/extensions/quick-reply/index.js | 53 +------------------ 2 files changed, 34 insertions(+), 54 deletions(-) diff --git a/public/scripts/chats.js b/public/scripts/chats.js index 2fbe3760a..cd8a8677e 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -380,6 +380,7 @@ jQuery(function () { $(document).on('click', '.editor_maximize', function () { const broId = $(this).attr('data-for'); const bro = $(`#${broId}`); + const withTab = $(this).attr('data-tab'); if (!bro.length) { console.error('Could not find editor with id', broId); @@ -392,11 +393,41 @@ jQuery(function () { const textarea = document.createElement('textarea'); textarea.value = String(bro.val()); textarea.classList.add('height100p', 'wide100p'); - textarea.oninput = function () { + textarea.addEventListener('input', function () { bro.val(textarea.value).trigger('input'); - }; + }); wrapper.appendChild(textarea); + if (withTab) { + textarea.addEventListener('keydown', (evt) => { + if (evt.key == 'Tab' && !evt.shiftKey && !evt.ctrlKey && !evt.altKey) { + evt.preventDefault(); + const start = textarea.selectionStart; + const end = textarea.selectionEnd; + if (end - start > 0 && textarea.value.substring(start, end).includes('\n')) { + const lineStart = textarea.value.lastIndexOf('\n', start); + const count = textarea.value.substring(lineStart, end).split('\n').length - 1; + textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n/g, '\n\t')}${textarea.value.substring(end)}`; + textarea.selectionStart = start + 1; + textarea.selectionEnd = end + count; + } else { + textarea.value = `${textarea.value.substring(0, start)}\t${textarea.value.substring(end)}`; + textarea.selectionStart = start + 1; + textarea.selectionEnd = end + 1; + } + } else if (evt.key == 'Tab' && evt.shiftKey && !evt.ctrlKey && !evt.altKey) { + evt.preventDefault(); + const start = textarea.selectionStart; + const end = textarea.selectionEnd; + const lineStart = textarea.value.lastIndexOf('\n', start); + const count = textarea.value.substring(lineStart, end).split('\n\t').length - 1; + textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n\t/g, '\n')}${textarea.value.substring(end)}`; + textarea.selectionStart = start - 1; + textarea.selectionEnd = end - count; + } + }); + } + callPopup(wrapper, 'text', '', { wide: true, large: true }); }); diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 7962cd41d..6ce80d51a 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -639,7 +639,7 @@ function generateQuickReplyElements() { - +
`; @@ -889,57 +889,6 @@ jQuery(async () => { saveSettingsDebounced(); }); - // taken and adjusted from chats.js (.editor_maximize) - $(document).on('click', '.editor_maximize_with_tab', function () { - const broId = $(this).attr('data-for'); - const bro = $(`#${broId}`); - - if (!bro.length) { - console.error('Could not find editor with id', broId); - return; - } - - const wrapper = document.createElement('div'); - wrapper.classList.add('height100p', 'wide100p', 'flex-container'); - wrapper.classList.add('flexFlowColumn', 'justifyCenter', 'alignitemscenter'); - const textarea = document.createElement('textarea'); - textarea.value = String(bro.val()); - textarea.classList.add('height100p', 'wide100p'); - textarea.addEventListener('keydown', (evt) => { - if (evt.key == 'Tab' && !evt.shiftKey && !evt.ctrlKey && !evt.altKey) { - evt.preventDefault(); - const start = textarea.selectionStart; - const end = textarea.selectionEnd; - if (end - start > 0 && textarea.value.substring(start, end).includes('\n')) { - const lineStart = textarea.value.lastIndexOf('\n', start); - const count = textarea.value.substring(lineStart, end).split('\n').length - 1; - textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n/g, '\n\t')}${textarea.value.substring(end)}`; - textarea.selectionStart = start + 1; - textarea.selectionEnd = end + count; - } else { - textarea.value = `${textarea.value.substring(0, start)}\t${textarea.value.substring(end)}`; - textarea.selectionStart = start + 1; - textarea.selectionEnd = end + 1; - } - } else if (evt.key == 'Tab' && evt.shiftKey && !evt.ctrlKey && !evt.altKey) { - evt.preventDefault(); - const start = textarea.selectionStart; - const end = textarea.selectionEnd; - const lineStart = textarea.value.lastIndexOf('\n', start); - const count = textarea.value.substring(lineStart, end).split('\n\t').length - 1; - textarea.value = `${textarea.value.substring(0, lineStart)}${textarea.value.substring(lineStart, end).replace(/\n\t/g, '\n')}${textarea.value.substring(end)}`; - textarea.selectionStart = start - 1; - textarea.selectionEnd = end - count; - } - }); - textarea.addEventListener('inpupt', () => { - bro.val(textarea.value).trigger('input'); - }); - wrapper.appendChild(textarea); - - callPopup(wrapper, 'text', '', { wide: true, large: true }); - }); - await loadSettings('init'); addQuickReplyBar(); From b107ace719085e287b8cb9a7639f319a5a25c824 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 18:00:04 +0200 Subject: [PATCH 033/179] Avoid sending 401 in forwarded responses. Status code 401 resets the client Basic auth, so we replace it with 400. This can produce an interesting artifact as "400 Unauthorized", but it's not out of spec. "The reason phrases listed here are only recommendations -- they can be replaced by local equivalents or left out altogether without affecting the protocol." https://www.rfc-editor.org/rfc/rfc9110.html#name-overview-of-status-codes --- src/util.js | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/util.js b/src/util.js index c6f344c71..11b864092 100644 --- a/src/util.js +++ b/src/util.js @@ -353,8 +353,24 @@ function getImages(path) { * @param {Express.Response} to The Express response to pipe to. */ function forwardFetchResponse(from, to) { - to.statusCode = from.status; - to.statusMessage = from.statusText; + let statusCode = from.status; + let statusText = from.statusText; + + if (!from.ok) { + console.log(`Streaming request failed with status ${statusCode} ${statusText}`); + } + + // Avoid sending 401 responses as they reset the client Basic auth. + // This can produce an interesting artifact as "400 Unauthorized", but it's not out of spec. + // https://www.rfc-editor.org/rfc/rfc9110.html#name-overview-of-status-codes + // "The reason phrases listed here are only recommendations -- they can be replaced by local + // equivalents or left out altogether without affecting the protocol." + if (statusCode === 401) { + statusCode = 400; + } + + to.statusCode = statusCode; + to.statusMessage = statusText; from.body.pipe(to); to.socket.on('close', function () { From bf88829b03b330a6d02a18e7317d9acbf61deefc Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Sun, 10 Dec 2023 16:32:10 +0000 Subject: [PATCH 034/179] add option to skip codeblock narration --- public/scripts/extensions/tts/index.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 9ae47750b..eb3a25da1 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -482,6 +482,12 @@ async function processTtsQueue() { console.debug('New message found, running TTS'); currentTtsJob = ttsJobQueue.shift(); let text = extension_settings.tts.narrate_translated_only ? (currentTtsJob?.extra?.display_text || currentTtsJob.mes) : currentTtsJob.mes; + + if (extension_settings.tts.skip_codeblocks) { + text = text.replace(/^\s{4}.*$/gm, '').trim(); + text = text.replace(/```.*?```/gs, '').trim(); + } + text = extension_settings.tts.narrate_dialogues_only ? text.replace(/\*[^*]*?(\*|$)/g, '').trim() // remove asterisks content : text.replaceAll('*', '').trim(); // remove just the asterisks @@ -639,6 +645,11 @@ function onNarrateTranslatedOnlyClick() { saveSettingsDebounced(); } +function onSkipCodeblocksClick() { + extension_settings.tts.skip_codeblocks = !!$('#tts_skip_codeblocks').prop('checked'); + saveSettingsDebounced(); +} + //##############// // TTS Provider // //##############// @@ -952,6 +963,10 @@ $(document).ready(function () { Narrate only the translated text +
@@ -972,6 +987,7 @@ $(document).ready(function () { $('#tts_narrate_dialogues').on('click', onNarrateDialoguesClick); $('#tts_narrate_quoted').on('click', onNarrateQuotedClick); $('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick); + $('#tts_skip_codeblocks').on('click', onSkipCodeblocksClick); $('#tts_auto_generation').on('click', onAutoGenerationClick); $('#tts_narrate_user').on('click', onNarrateUserClick); $('#tts_voices').on('click', onTtsVoicesClick); From f8a903e1fdb7e89dce93de16821a1d89caf9724f Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 18:45:55 +0200 Subject: [PATCH 035/179] #1506 Fix big markdown images --- public/style.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/style.css b/public/style.css index 38de605cb..b7ef54902 100644 --- a/public/style.css +++ b/public/style.css @@ -316,6 +316,11 @@ table.responsiveTable { padding: 1em; } +.mes_text img:not(.mes_img) { + max-width: 100%; + max-height: var(--doc-height); +} + .mes .mes_timer, .mes .mesIDDisplay, .mes .tokenCounterDisplay { From f5d2e50f5e5417579d105f67892d4c0e35c5e097 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 16:20:54 -0500 Subject: [PATCH 036/179] Remove isGenerationAborted Just check the AbortSignal. --- public/scripts/group-chats.js | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/public/scripts/group-chats.js b/public/scripts/group-chats.js index b906cf2cf..7acf4ae9e 100644 --- a/public/scripts/group-chats.js +++ b/public/scripts/group-chats.js @@ -658,7 +658,6 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { let activationText = ''; let isUserInput = false; let isGenerationDone = false; - let isGenerationAborted = false; if (userInput?.length && !by_auto_mode) { isUserInput = true; @@ -673,14 +672,8 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { const resolveOriginal = params.resolve; const rejectOriginal = params.reject; - if (params.signal instanceof AbortSignal) { - if (params.signal.aborted) { + if (params.signal instanceof AbortSignal && params.signal.aborted) { throw new Error('Already aborted signal passed. Group generation stopped'); - } - - params.signal.onabort = () => { - isGenerationAborted = true; - }; } if (typeof params.resolve === 'function') { @@ -760,7 +753,7 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { // TODO: This is awful. Refactor this while (true) { deactivateSendButtons(); - if (isGenerationAborted) { + if (params.signal instanceof AbortSignal && params.signal.aborted) { throw new Error('Group generation aborted'); } From 03884b29adc7d5da31cd70309bb3f29c24977914 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 17:32:07 -0500 Subject: [PATCH 037/179] Always call resolve in Generate() This lets us get rid of the janky hack in group-chats to tell when a message is done generating. --- public/script.js | 27 ++++--- public/scripts/group-chats.js | 128 +++++++--------------------------- 2 files changed, 44 insertions(+), 111 deletions(-) diff --git a/public/script.js b/public/script.js index 373b5c9f7..0902d0771 100644 --- a/public/script.js +++ b/public/script.js @@ -2916,6 +2916,14 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, abortController = new AbortController(); } + // Set empty promise resolution functions + if (typeof resolve !== 'function') { + resolve = () => { }; + } + if (typeof reject !== 'function') { + reject = () => { }; + } + // OpenAI doesn't need instruct mode. Use OAI main prompt instead. const isInstruct = power_user.instruct.enabled && main_api !== 'openai'; const isImpersonate = type == 'impersonate'; @@ -2927,12 +2935,14 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (interruptedByCommand) { //$("#send_textarea").val('').trigger('input'); unblockGeneration(); + resolve(); return; } if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_flags.can_use_streaming) { toastr.error('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true }); unblockGeneration(); + resolve(); return; } @@ -2942,11 +2952,13 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, textgen_settings.type !== MANCER) { toastr.error('Streaming is not supported for the Legacy API. Update Ooba and use --extensions openai to enable streaming.', undefined, { timeOut: 10000, preventDuplicates: true }); unblockGeneration(); + resolve(); return; } if (isHordeGenerationNotAllowed()) { unblockGeneration(); + resolve(); return; } @@ -2955,14 +2967,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, hideSwipeButtons(); } - // Set empty promise resolution functions - if (typeof resolve !== 'function') { - resolve = () => { }; - } - if (typeof reject !== 'function') { - reject = () => { }; - } - if (selected_group && !is_group_generating && !dryRun) { generateGroupWrapper(false, type, { resolve, reject, quiet_prompt, force_chid, signal: abortController.signal, quietImage }); return; @@ -2987,6 +2991,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } else { console.log('No enabled members found'); unblockGeneration(); + resolve(); return; } } @@ -3151,6 +3156,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (aborted) { console.debug('Generation aborted by extension interceptors'); unblockGeneration(); + resolve(); return; } } else { @@ -3206,6 +3212,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } catch { unblockGeneration(); + resolve(); return; } if (horde_settings.auto_adjust_context_length) { @@ -3925,6 +3932,8 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, console.debug('swiping right automatically'); is_send_press = false; swipe_right(); + // TODO: do we want to resolve after an auto-swipe? + resolve(); return; } } @@ -3950,8 +3959,8 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (type !== 'quiet') { triggerAutoContinue(messageChunk, isImpersonate); - resolve(); } + resolve(); } function onError(exception) { diff --git a/public/scripts/group-chats.js b/public/scripts/group-chats.js index 7acf4ae9e..21929684f 100644 --- a/public/scripts/group-chats.js +++ b/public/scripts/group-chats.js @@ -8,7 +8,6 @@ import { extractAllWords, saveBase64AsFile, PAGINATION_TEMPLATE, - waitUntilCondition, getBase64Async, } from './utils.js'; import { RA_CountCharTokens, humanizedDateTime, dragElement, favsToHotswap, getMessageTimeStamp } from './RossAscends-mods.js'; @@ -46,7 +45,6 @@ import { updateChatMetadata, isStreamingEnabled, getThumbnailUrl, - streamingProcessor, getRequestHeaders, setMenuType, menu_type, @@ -653,41 +651,20 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { // id of this specific batch for regeneration purposes group_generation_id = Date.now(); const lastMessage = chat[chat.length - 1]; - let messagesBefore = chat.length; - let lastMessageText = lastMessage?.mes || ''; let activationText = ''; let isUserInput = false; - let isGenerationDone = false; if (userInput?.length && !by_auto_mode) { isUserInput = true; activationText = userInput; - messagesBefore++; } else { if (lastMessage && !lastMessage.is_system) { activationText = lastMessage.mes; } } - const resolveOriginal = params.resolve; - const rejectOriginal = params.reject; - if (params.signal instanceof AbortSignal && params.signal.aborted) { - throw new Error('Already aborted signal passed. Group generation stopped'); - } - - if (typeof params.resolve === 'function') { - params.resolve = function () { - isGenerationDone = true; - resolveOriginal.apply(this, arguments); - }; - } - - if (typeof params.reject === 'function') { - params.reject = function () { - isGenerationDone = true; - rejectOriginal.apply(this, arguments); - }; + throw new Error('Already aborted signal passed. Group generation stopped'); } const activationStrategy = Number(group.activation_strategy ?? group_activation_strategy.NATURAL); @@ -735,90 +712,37 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { // now the real generation begins: cycle through every activated character for (const chId of activatedMembers) { deactivateSendButtons(); - isGenerationDone = false; const generateType = type == 'swipe' || type == 'impersonate' || type == 'quiet' || type == 'continue' ? type : 'group_chat'; setCharacterId(chId); setCharacterName(characters[chId].name); - await Generate(generateType, { automatic_trigger: by_auto_mode, ...(params || {}) }); + // Wait for generation to finish + await new Promise(async (resolve, reject) => { + await Generate(generateType, { + automatic_trigger: by_auto_mode, + ...(params || {}), + resolve: function(...args) { + if (typeof params.resolve === 'function') { + params.resolve(...args); + } + resolve(); + }, + reject: function(...args) { + if (typeof params.reject === 'function') { + params.reject(...args); + } + reject(); + }, + }); - if (type !== 'swipe' && type !== 'impersonate' && !isStreamingEnabled()) { - // update indicator and scroll down - typingIndicator - .find('.typing_indicator_name') - .text(characters[chId].name); - typingIndicator.show(); - } - - // TODO: This is awful. Refactor this - while (true) { - deactivateSendButtons(); - if (params.signal instanceof AbortSignal && params.signal.aborted) { - throw new Error('Group generation aborted'); + if (type !== 'swipe' && type !== 'impersonate' && !isStreamingEnabled()) { + // update indicator and scroll down + typingIndicator + .find('.typing_indicator_name') + .text(characters[chId].name); + typingIndicator.show(); } - - // if not swipe - check if message generated already - if (generateType === 'group_chat' && chat.length == messagesBefore) { - await delay(100); - } - // if swipe - see if message changed - else if (type === 'swipe') { - if (isStreamingEnabled()) { - if (streamingProcessor && !streamingProcessor.isFinished) { - await delay(100); - } - else { - break; - } - } - else { - if (lastMessageText === chat[chat.length - 1].mes) { - await delay(100); - } - else { - break; - } - } - } - else if (type === 'impersonate') { - if (isStreamingEnabled()) { - if (streamingProcessor && !streamingProcessor.isFinished) { - await delay(100); - } - else { - break; - } - } - else { - if (!$('#send_textarea').val() || $('#send_textarea').val() == userInput) { - await delay(100); - } - else { - break; - } - } - } - else if (type === 'quiet') { - if (isGenerationDone) { - break; - } else { - await delay(100); - } - } - else if (isStreamingEnabled()) { - if (streamingProcessor && !streamingProcessor.isFinished) { - await delay(100); - } else { - await waitUntilCondition(() => streamingProcessor == null, 1000, 10); - messagesBefore++; - break; - } - } - else { - messagesBefore++; - break; - } - } + }); } } finally { typingIndicator.hide(); From 33f969f097d4cc2cd0dc5b94043d94d59a712440 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 7 Dec 2023 17:46:15 -0500 Subject: [PATCH 038/179] Have Generate() return a promise Generate(), being async, now returns a promise-within-a-promise. If called with `let p = await Generate(...)`, it'll wait for generation to *start*. If you then `await p`, you'll wait for generation to *finish*. This makes it much easier to tell exactly when generation's done. generateGroupWrapper has been similarly modified. --- public/script.js | 514 ++++++++++++++++------------------ public/scripts/group-chats.js | 42 +-- 2 files changed, 256 insertions(+), 300 deletions(-) diff --git a/public/script.js b/public/script.js index 0902d0771..98481d21c 100644 --- a/public/script.js +++ b/public/script.js @@ -2307,26 +2307,8 @@ function getStoppingStrings(isImpersonate, isContinue) { */ export async function generateQuietPrompt(quiet_prompt, quietToLoud, skipWIAN, quietImage = null) { console.log('got into genQuietPrompt'); - return await new Promise( - async function promptPromise(resolve, reject) { - if (quietToLoud === true) { - try { - await Generate('quiet', { resolve, reject, quiet_prompt, quietToLoud: true, skipWIAN: skipWIAN, force_name2: true, quietImage: quietImage }); - } - catch { - reject(); - } - } - else { - try { - console.log('going to generate non-QuietToLoud'); - await Generate('quiet', { resolve, reject, quiet_prompt, quietToLoud: false, skipWIAN: skipWIAN, force_name2: true, quietImage: quietImage }); - } - catch { - reject(); - } - } - }); + const generateFinished = await Generate('quiet', { quiet_prompt, quietToLoud, skipWIAN: skipWIAN, force_name2: true, quietImage: quietImage }); + await generateFinished; } async function processCommands(message, type, dryRun) { @@ -2906,7 +2888,8 @@ export async function generateRaw(prompt, api, instructOverride) { return message; } -async function Generate(type, { automatic_trigger, force_name2, resolve, reject, quiet_prompt, quietToLoud, skipWIAN, force_chid, signal, quietImage } = {}, dryRun = false) { +// Returns a promise that resolves when the text is done generating. +async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, quietToLoud, skipWIAN, force_chid, signal, quietImage } = {}, dryRun = false) { console.log('Generate entered'); setGenerationProgress(0); generation_started = new Date(); @@ -2916,14 +2899,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, abortController = new AbortController(); } - // Set empty promise resolution functions - if (typeof resolve !== 'function') { - resolve = () => { }; - } - if (typeof reject !== 'function') { - reject = () => { }; - } - // OpenAI doesn't need instruct mode. Use OAI main prompt instead. const isInstruct = power_user.instruct.enabled && main_api !== 'openai'; const isImpersonate = type == 'impersonate'; @@ -2935,15 +2910,13 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (interruptedByCommand) { //$("#send_textarea").val('').trigger('input'); unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } if (main_api == 'kobold' && kai_settings.streaming_kobold && !kai_flags.can_use_streaming) { toastr.error('Streaming is enabled, but the version of Kobold used does not support token streaming.', undefined, { timeOut: 10000, preventDuplicates: true }); unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } if (main_api === 'textgenerationwebui' && @@ -2952,14 +2925,12 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, textgen_settings.type !== MANCER) { toastr.error('Streaming is not supported for the Legacy API. Update Ooba and use --extensions openai to enable streaming.', undefined, { timeOut: 10000, preventDuplicates: true }); unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } if (isHordeGenerationNotAllowed()) { unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } // Hide swipes if not in a dry run. @@ -2968,8 +2939,8 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } if (selected_group && !is_group_generating && !dryRun) { - generateGroupWrapper(false, type, { resolve, reject, quiet_prompt, force_chid, signal: abortController.signal, quietImage }); - return; + // TODO: await here! + return generateGroupWrapper(false, type, { quiet_prompt, force_chid, signal: abortController.signal, quietImage }); } else if (selected_group && !is_group_generating && dryRun) { const characterIndexMap = new Map(characters.map((char, index) => [char.avatar, index])); const group = groups.find((x) => x.id === selected_group); @@ -2991,8 +2962,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } else { console.log('No enabled members found'); unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } } @@ -3156,8 +3126,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (aborted) { console.debug('Generation aborted by extension interceptors'); unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } } else { console.debug('Skipping extension interceptors for dry run'); @@ -3212,8 +3181,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } catch { unblockGeneration(); - resolve(); - return; + return Promise.resolve(); } if (horde_settings.auto_adjust_context_length) { this_max_context = (adjustedParams.maxContextLength - adjustedParams.maxLength); @@ -3373,7 +3341,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } const originalType = type; - runGenerate(cyclePrompt); + return runGenerate(cyclePrompt); async function runGenerate(cycleGenerationPrompt = '') { if (!dryRun) { @@ -3721,258 +3689,260 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, } } - if (true === dryRun) return onSuccess({ error: 'dryRun' }); + return new Promise(async (resolve, reject) => { + if (true === dryRun) return onSuccess({ error: 'dryRun' }); - if (power_user.console_log_prompts) { - console.log(generate_data.prompt); - } + if (power_user.console_log_prompts) { + console.log(generate_data.prompt); + } - let generate_url = getGenerateUrl(main_api); - console.debug('rungenerate calling API'); + let generate_url = getGenerateUrl(main_api); + console.debug('rungenerate calling API'); - showStopButton(); + showStopButton(); - //set array object for prompt token itemization of this message - let currentArrayEntry = Number(thisPromptBits.length - 1); - let additionalPromptStuff = { - ...thisPromptBits[currentArrayEntry], - rawPrompt: generate_data.prompt || generate_data.input, - mesId: getNextMessageId(type), - allAnchors: allAnchors, - summarizeString: (extension_prompts['1_memory']?.value || ''), - authorsNoteString: (extension_prompts['2_floating_prompt']?.value || ''), - smartContextString: (extension_prompts['chromadb']?.value || ''), - worldInfoString: worldInfoString, - storyString: storyString, - beforeScenarioAnchor: beforeScenarioAnchor, - afterScenarioAnchor: afterScenarioAnchor, - examplesString: examplesString, - mesSendString: mesSendString, - generatedPromptCache: generatedPromptCache, - promptBias: promptBias, - finalPrompt: finalPrompt, - charDescription: description, - charPersonality: personality, - scenarioText: scenario, - this_max_context: this_max_context, - padding: power_user.token_padding, - main_api: main_api, - instruction: isInstruct ? substituteParams(power_user.prefer_character_prompt && system ? system : power_user.instruct.system_prompt) : '', - userPersona: (power_user.persona_description || ''), - }; + //set array object for prompt token itemization of this message + let currentArrayEntry = Number(thisPromptBits.length - 1); + let additionalPromptStuff = { + ...thisPromptBits[currentArrayEntry], + rawPrompt: generate_data.prompt || generate_data.input, + mesId: getNextMessageId(type), + allAnchors: allAnchors, + summarizeString: (extension_prompts['1_memory']?.value || ''), + authorsNoteString: (extension_prompts['2_floating_prompt']?.value || ''), + smartContextString: (extension_prompts['chromadb']?.value || ''), + worldInfoString: worldInfoString, + storyString: storyString, + beforeScenarioAnchor: beforeScenarioAnchor, + afterScenarioAnchor: afterScenarioAnchor, + examplesString: examplesString, + mesSendString: mesSendString, + generatedPromptCache: generatedPromptCache, + promptBias: promptBias, + finalPrompt: finalPrompt, + charDescription: description, + charPersonality: personality, + scenarioText: scenario, + this_max_context: this_max_context, + padding: power_user.token_padding, + main_api: main_api, + instruction: isInstruct ? substituteParams(power_user.prefer_character_prompt && system ? system : power_user.instruct.system_prompt) : '', + userPersona: (power_user.persona_description || ''), + }; - thisPromptBits = additionalPromptStuff; + thisPromptBits = additionalPromptStuff; - //console.log(thisPromptBits); - const itemizedIndex = itemizedPrompts.findIndex((item) => item.mesId === thisPromptBits['mesId']); + //console.log(thisPromptBits); + const itemizedIndex = itemizedPrompts.findIndex((item) => item.mesId === thisPromptBits['mesId']); - if (itemizedIndex !== -1) { - itemizedPrompts[itemizedIndex] = thisPromptBits; - } - else { - itemizedPrompts.push(thisPromptBits); - } - - console.debug(`pushed prompt bits to itemizedPrompts array. Length is now: ${itemizedPrompts.length}`); - /** @type {Promise} */ - let streamingGeneratorPromise = Promise.resolve(); - - if (main_api == 'openai') { - if (isStreamingEnabled() && type !== 'quiet') { - streamingGeneratorPromise = sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal); + if (itemizedIndex !== -1) { + itemizedPrompts[itemizedIndex] = thisPromptBits; } else { - sendOpenAIRequest(type, generate_data.prompt, abortController.signal).then(onSuccess).catch(onError); + itemizedPrompts.push(thisPromptBits); } - } - else if (main_api == 'koboldhorde') { - generateHorde(finalPrompt, generate_data, abortController.signal, true).then(onSuccess).catch(onError); - } - else if (main_api == 'textgenerationwebui' && isStreamingEnabled() && type !== 'quiet') { - streamingGeneratorPromise = generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal); - } - else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { - streamingGeneratorPromise = generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); - } - else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { - streamingGeneratorPromise = generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); - } - else { - try { - const response = await fetch(generate_url, { - method: 'POST', - headers: getRequestHeaders(), - cache: 'no-cache', - body: JSON.stringify(generate_data), - signal: abortController.signal, - }); - if (!response.ok) { - const error = await response.json(); - throw error; + console.debug(`pushed prompt bits to itemizedPrompts array. Length is now: ${itemizedPrompts.length}`); + /** @type {Promise} */ + let streamingGeneratorPromise = Promise.resolve(); + + if (main_api == 'openai') { + if (isStreamingEnabled() && type !== 'quiet') { + streamingGeneratorPromise = sendOpenAIRequest(type, generate_data.prompt, streamingProcessor.abortController.signal); + } + else { + sendOpenAIRequest(type, generate_data.prompt, abortController.signal).then(onSuccess).catch(onError); + } + } + else if (main_api == 'koboldhorde') { + generateHorde(finalPrompt, generate_data, abortController.signal, true).then(onSuccess).catch(onError); + } + else if (main_api == 'textgenerationwebui' && isStreamingEnabled() && type !== 'quiet') { + streamingGeneratorPromise = generateTextGenWithStreaming(generate_data, streamingProcessor.abortController.signal); + } + else if (main_api == 'novel' && isStreamingEnabled() && type !== 'quiet') { + streamingGeneratorPromise = generateNovelWithStreaming(generate_data, streamingProcessor.abortController.signal); + } + else if (main_api == 'kobold' && isStreamingEnabled() && type !== 'quiet') { + streamingGeneratorPromise = generateKoboldWithStreaming(generate_data, streamingProcessor.abortController.signal); + } + else { + try { + const response = await fetch(generate_url, { + method: 'POST', + headers: getRequestHeaders(), + cache: 'no-cache', + body: JSON.stringify(generate_data), + signal: abortController.signal, + }); + + if (!response.ok) { + const error = await response.json(); + throw error; + } + + const data = await response.json(); + onSuccess(data); + } catch (error) { + onError(error); + } + } + + if (isStreamingEnabled() && type !== 'quiet') { + try { + const streamingGenerator = await streamingGeneratorPromise; + streamingProcessor.generator = streamingGenerator; + hideSwipeButtons(); + let getMessage = await streamingProcessor.generate(); + let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + + if (isContinue) { + getMessage = continue_mag + getMessage; + } + + if (streamingProcessor && !streamingProcessor.isStopped && streamingProcessor.isFinished) { + await streamingProcessor.onFinishStreaming(streamingProcessor.messageId, getMessage); + streamingProcessor = null; + triggerAutoContinue(messageChunk, isImpersonate); + } + resolve(); + } catch (err) { + onError(err); } - const data = await response.json(); - onSuccess(data); - } catch (error) { - onError(error); } - } - if (isStreamingEnabled() && type !== 'quiet') { - try { - const streamingGenerator = await streamingGeneratorPromise; - streamingProcessor.generator = streamingGenerator; - hideSwipeButtons(); - let getMessage = await streamingProcessor.generate(); - let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + async function onSuccess(data) { + let messageChunk = ''; - if (isContinue) { - getMessage = continue_mag + getMessage; + if (data.error == 'dryRun') { + generatedPromptCache = ''; + resolve(); + return; } - if (streamingProcessor && !streamingProcessor.isStopped && streamingProcessor.isFinished) { - await streamingProcessor.onFinishStreaming(streamingProcessor.messageId, getMessage); - streamingProcessor = null; + if (!data.error) { + //const getData = await response.json(); + let getMessage = extractMessageFromData(data); + let title = extractTitleFromData(data); + kobold_horde_model = title; + + const swipes = extractMultiSwipes(data, type); + + messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + + if (isContinue) { + getMessage = continue_mag + getMessage; + } + + //Formating + const displayIncomplete = type === 'quiet' && !quietToLoud; + getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete); + + if (getMessage.length > 0) { + if (isImpersonate) { + $('#send_textarea').val(getMessage).trigger('input'); + generatedPromptCache = ''; + await eventSource.emit(event_types.IMPERSONATE_READY, getMessage); + } + else if (type == 'quiet') { + resolve(getMessage); + } + else { + // Without streaming we'll be having a full message on continuation. Treat it as a last chunk. + if (originalType !== 'continue') { + ({ type, getMessage } = await saveReply(type, getMessage, false, title, swipes)); + } + else { + ({ type, getMessage } = await saveReply('appendFinal', getMessage, false, title, swipes)); + } + } + activateSendButtons(); + + if (type !== 'quiet') { + playMessageSound(); + } + + generate_loop_counter = 0; + } else { + ++generate_loop_counter; + + if (generate_loop_counter > MAX_GENERATION_LOOPS) { + throwCircuitBreakerError(); + } + + // regenerate with character speech reenforced + // to make sure we leave on swipe type while also adding the name2 appendage + setTimeout(() => { + Generate(type, { automatic_trigger, force_name2: true, resolve, reject, quiet_prompt, skipWIAN, force_chid }); + }, generate_loop_counter * 1000); + } + + if (power_user.auto_swipe) { + console.debug('checking for autoswipeblacklist on non-streaming message'); + function containsBlacklistedWords(getMessage, blacklist, threshold) { + console.debug('checking blacklisted words'); + const regex = new RegExp(`\\b(${blacklist.join('|')})\\b`, 'gi'); + const matches = getMessage.match(regex) || []; + return matches.length >= threshold; + } + + const generatedTextFiltered = (getMessage) => { + if (power_user.auto_swipe_blacklist_threshold) { + if (containsBlacklistedWords(getMessage, power_user.auto_swipe_blacklist, power_user.auto_swipe_blacklist_threshold)) { + console.debug('Generated text has blacklisted words'); + return true; + } + } + + return false; + }; + if (generatedTextFiltered(getMessage)) { + console.debug('swiping right automatically'); + is_send_press = false; + swipe_right(); + // TODO: do we want to resolve after an auto-swipe? + resolve(); + return; + } + } + } else { + generatedPromptCache = ''; + activateSendButtons(); + //console.log('runGenerate calling showSwipeBtns'); + showSwipeButtons(); + + if (data?.response) { + toastr.error(data.response, 'API Error'); + } + } + console.debug('/api/chats/save called by /Generate'); + + await saveChatConditional(); + is_send_press = false; + hideStopButton(); + activateSendButtons(); + showSwipeButtons(); + setGenerationProgress(0); + streamingProcessor = null; + + if (type !== 'quiet') { triggerAutoContinue(messageChunk, isImpersonate); } resolve(); - } catch (err) { - onError(err); } - } - - async function onSuccess(data) { - let messageChunk = ''; - - if (data.error == 'dryRun') { - generatedPromptCache = ''; - resolve(); - return; - } - - if (!data.error) { - //const getData = await response.json(); - let getMessage = extractMessageFromData(data); - let title = extractTitleFromData(data); - kobold_horde_model = title; - - const swipes = extractMultiSwipes(data, type); - - messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); - - if (isContinue) { - getMessage = continue_mag + getMessage; + function onError(exception) { + if (typeof exception?.error?.message === 'string') { + toastr.error(exception.error.message, 'Error', { timeOut: 10000, extendedTimeOut: 20000 }); } - //Formating - const displayIncomplete = type === 'quiet' && !quietToLoud; - getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete); - - if (getMessage.length > 0) { - if (isImpersonate) { - $('#send_textarea').val(getMessage).trigger('input'); - generatedPromptCache = ''; - await eventSource.emit(event_types.IMPERSONATE_READY, getMessage); - } - else if (type == 'quiet') { - resolve(getMessage); - } - else { - // Without streaming we'll be having a full message on continuation. Treat it as a last chunk. - if (originalType !== 'continue') { - ({ type, getMessage } = await saveReply(type, getMessage, false, title, swipes)); - } - else { - ({ type, getMessage } = await saveReply('appendFinal', getMessage, false, title, swipes)); - } - } - activateSendButtons(); - - if (type !== 'quiet') { - playMessageSound(); - } - - generate_loop_counter = 0; - } else { - ++generate_loop_counter; - - if (generate_loop_counter > MAX_GENERATION_LOOPS) { - throwCircuitBreakerError(); - } - - // regenerate with character speech reenforced - // to make sure we leave on swipe type while also adding the name2 appendage - setTimeout(() => { - Generate(type, { automatic_trigger, force_name2: true, resolve, reject, quiet_prompt, skipWIAN, force_chid }); - }, generate_loop_counter * 1000); - } - - if (power_user.auto_swipe) { - console.debug('checking for autoswipeblacklist on non-streaming message'); - function containsBlacklistedWords(getMessage, blacklist, threshold) { - console.debug('checking blacklisted words'); - const regex = new RegExp(`\\b(${blacklist.join('|')})\\b`, 'gi'); - const matches = getMessage.match(regex) || []; - return matches.length >= threshold; - } - - const generatedTextFiltered = (getMessage) => { - if (power_user.auto_swipe_blacklist_threshold) { - if (containsBlacklistedWords(getMessage, power_user.auto_swipe_blacklist, power_user.auto_swipe_blacklist_threshold)) { - console.debug('Generated text has blacklisted words'); - return true; - } - } - - return false; - }; - if (generatedTextFiltered(getMessage)) { - console.debug('swiping right automatically'); - is_send_press = false; - swipe_right(); - // TODO: do we want to resolve after an auto-swipe? - resolve(); - return; - } - } - } else { - generatedPromptCache = ''; - activateSendButtons(); - //console.log('runGenerate calling showSwipeBtns'); - showSwipeButtons(); - - if (data?.response) { - toastr.error(data.response, 'API Error'); - } + reject(exception); + unblockGeneration(); + console.log(exception); + streamingProcessor = null; } - console.debug('/api/chats/save called by /Generate'); - - await saveChatConditional(); - is_send_press = false; - hideStopButton(); - activateSendButtons(); - showSwipeButtons(); - setGenerationProgress(0); - streamingProcessor = null; - - if (type !== 'quiet') { - triggerAutoContinue(messageChunk, isImpersonate); - } - resolve(); - } - - function onError(exception) { - if (typeof exception?.error?.message === 'string') { - toastr.error(exception.error.message, 'Error', { timeOut: 10000, extendedTimeOut: 20000 }); - } - - reject(exception); - unblockGeneration(); - console.log(exception); - streamingProcessor = null; - } + }); } //rungenerate ends } else { //generate's primary loop ends, after this is error handling for no-connection or safety-id diff --git a/public/scripts/group-chats.js b/public/scripts/group-chats.js index 21929684f..1bbb20087 100644 --- a/public/scripts/group-chats.js +++ b/public/scripts/group-chats.js @@ -612,11 +612,11 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { if (online_status === 'no_connection') { is_group_generating = false; setSendButtonState(false); - return; + return Promise.resolve(); } if (is_group_generating) { - return false; + return Promise.resolve(); } // Auto-navigate back to group menu @@ -630,7 +630,7 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { if (!group || !Array.isArray(group.members) || !group.members.length) { sendSystemMessage(system_message_types.EMPTY, '', { isSmallSys: true }); - return; + return Promise.resolve(); } try { @@ -717,32 +717,16 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { setCharacterName(characters[chId].name); // Wait for generation to finish - await new Promise(async (resolve, reject) => { - await Generate(generateType, { - automatic_trigger: by_auto_mode, - ...(params || {}), - resolve: function(...args) { - if (typeof params.resolve === 'function') { - params.resolve(...args); - } - resolve(); - }, - reject: function(...args) { - if (typeof params.reject === 'function') { - params.reject(...args); - } - reject(); - }, - }); + const generateFinished = await Generate(generateType, { automatic_trigger: by_auto_mode, ...(params || {}) }); + await generateFinished; - if (type !== 'swipe' && type !== 'impersonate' && !isStreamingEnabled()) { - // update indicator and scroll down - typingIndicator - .find('.typing_indicator_name') - .text(characters[chId].name); - typingIndicator.show(); - } - }); + if (type !== 'swipe' && type !== 'impersonate' && !isStreamingEnabled()) { + // update indicator and scroll down + typingIndicator + .find('.typing_indicator_name') + .text(characters[chId].name); + typingIndicator.show(); + } } } finally { typingIndicator.hide(); @@ -755,6 +739,8 @@ async function generateGroupWrapper(by_auto_mode, type = null, params = {}) { activateSendButtons(); showSwipeButtons(); } + + return Promise.resolve(); } function getLastMessageGenerationId() { From 420d186823e0fdfa834040f43789675a48049b81 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 20:02:25 +0200 Subject: [PATCH 039/179] Add reduced motion toggle --- public/index.html | 4 ++++ public/script.js | 39 +++++++++++++++++++++------------- public/scripts/authors-note.js | 13 ++++++------ public/scripts/cfg-scale.js | 13 ++++++------ public/scripts/power-user.js | 29 +++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 27 deletions(-) diff --git a/public/index.html b/public/index.html index be581c89b..0f5a41cd1 100644 --- a/public/index.html +++ b/public/index.html @@ -2895,6 +2895,10 @@

Theme Toggles

+
+ +

diff --git a/public/scripts/world-info.js b/public/scripts/world-info.js index a56879b85..3ab3fbfa4 100644 --- a/public/scripts/world-info.js +++ b/public/scripts/world-info.js @@ -1684,20 +1684,13 @@ async function checkWorldInfo(chat, maxContext) { // Add the depth or AN if enabled // Put this code here since otherwise, the chat reference is modified - if (extension_settings.note.allowWIScan) { - for (const key of Object.keys(context.extensionPrompts)) { - if (key.startsWith('DEPTH_PROMPT')) { - const depthPrompt = getExtensionPromptByName(key); - if (depthPrompt) { - textToScan = `${depthPrompt}\n${textToScan}`; - } + for (const key of Object.keys(context.extensionPrompts)) { + if (context.extensionPrompts[key]?.scan) { + const prompt = getExtensionPromptByName(key); + if (prompt) { + textToScan = `${prompt}\n${textToScan}`; } } - - const anPrompt = getExtensionPromptByName(NOTE_MODULE_NAME); - if (anPrompt) { - textToScan = `${anPrompt}\n${textToScan}`; - } } // Transform the resulting string @@ -1948,7 +1941,7 @@ async function checkWorldInfo(chat, maxContext) { if (shouldWIAddPrompt) { const originalAN = context.extensionPrompts[NOTE_MODULE_NAME].value; const ANWithWI = `${ANTopEntries.join('\n')}\n${originalAN}\n${ANBottomEntries.join('\n')}`; - context.setExtensionPrompt(NOTE_MODULE_NAME, ANWithWI, chat_metadata[metadata_keys.position], chat_metadata[metadata_keys.depth]); + context.setExtensionPrompt(NOTE_MODULE_NAME, ANWithWI, chat_metadata[metadata_keys.position], chat_metadata[metadata_keys.depth], extension_settings.note.allowWIScan); } return { worldInfoBefore, worldInfoAfter, WIDepthEntries }; From 69f90a0b30d7d2f367113321a7c0f05f5230354a Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Mon, 11 Dec 2023 22:51:07 +0000 Subject: [PATCH 067/179] add /tokens slash command to call getTokenCount --- public/scripts/slash-commands.js | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scripts/slash-commands.js b/public/scripts/slash-commands.js index 24a2382a7..1768762db 100644 --- a/public/scripts/slash-commands.js +++ b/public/scripts/slash-commands.js @@ -186,6 +186,7 @@ parser.addCommand('trimend', trimEndCallback, [], '(text parser.addCommand('inject', injectCallback, [], 'id=injectId (position=before/after/chat depth=number [text]) – injects a text into the LLM prompt for the current chat. Requires a unique injection ID. Positions: "before" main prompt, "after" main prompt, in-"chat" (default: after). Depth: injection depth for the prompt (default: 4).', true, true); parser.addCommand('listinjects', listInjectsCallback, [], ' – lists all script injections for the current chat.', true, true); parser.addCommand('flushinjects', flushInjectsCallback, [], ' – removes all script injections for the current chat.', true, true); +parser.addCommand('tokens', (_, text) => getTokenCount(text), [], '(text) – counts the number of tokens in the text.', true, true); registerVariableCommands(); const NARRATOR_NAME_KEY = 'narrator_name'; From 2bdd3672d47f0db4ef6eb720044eda9ea3550f2d Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Mon, 11 Dec 2023 23:06:21 +0000 Subject: [PATCH 068/179] add macro for first included message in context --- public/script.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/public/script.js b/public/script.js index fe3513a2a..f63359b1a 100644 --- a/public/script.js +++ b/public/script.js @@ -2017,6 +2017,20 @@ function getLastMessageId() { return ''; } +/** + * Returns the ID of the first message included in the context. + * @returns {string} The ID of the first message in the context. + */ +function getFirstIncludedMessageId() { + const index = document.querySelector('.lastInContext')?.getAttribute('mesid'); + + if (!isNaN(index) && index >= 0) { + return String(index); + } + + return ''; +} + /** * Returns the last message in the chat. * @returns {string} The last message in the chat. @@ -2119,6 +2133,7 @@ function substituteParams(content, _name1, _name2, _original, _group, _replaceCh content = content.replace(/{{group}}/gi, _group); content = content.replace(/{{lastMessage}}/gi, getLastMessage()); content = content.replace(/{{lastMessageId}}/gi, getLastMessageId()); + content = content.replace(/{{firstIncludedMessageId}}/gi, getFirstIncludedMessageId()); content = content.replace(/{{lastSwipeId}}/gi, getLastSwipeId()); content = content.replace(/{{currentSwipeId}}/gi, getCurrentSwipeId()); From 299749a4e79ccb310e62a10275b719093e1013b1 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 01:08:47 +0200 Subject: [PATCH 069/179] Add prerequisites for websearch extension --- public/script.js | 16 ++++---------- public/scripts/chats.js | 19 +++++++++++++++++ public/scripts/extensions.js | 2 +- public/scripts/utils.js | 6 ++++-- src/endpoints/serpapi.js | 41 ++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 15 deletions(-) diff --git a/public/script.js b/public/script.js index fe3513a2a..780e28dbd 100644 --- a/public/script.js +++ b/public/script.js @@ -190,7 +190,7 @@ import { getBackgrounds, initBackgrounds } from './scripts/backgrounds.js'; import { hideLoader, showLoader } from './scripts/loader.js'; import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js'; import { loadMancerModels } from './scripts/mancer-settings.js'; -import { getFileAttachment, hasPendingFileAttachment, populateFileAttachment } from './scripts/chats.js'; +import { appendFileContent, hasPendingFileAttachment, populateFileAttachment } from './scripts/chats.js'; import { replaceVariableMacros } from './scripts/variables.js'; //exporting functions and vars for mods @@ -3098,26 +3098,18 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu coreChat.pop(); } - coreChat = await Promise.all(coreChat.map(async (chatItem) => { + coreChat = await Promise.all(coreChat.map(async (chatItem, index) => { let message = chatItem.mes; let regexType = chatItem.is_user ? regex_placement.USER_INPUT : regex_placement.AI_OUTPUT; let options = { isPrompt: true }; let regexedMessage = getRegexedString(message, regexType, options); - - if (chatItem.extra?.file) { - const fileText = chatItem.extra.file.text || (await getFileAttachment(chatItem.extra.file.url)); - - if (fileText) { - const fileWrapped = `\`\`\`\n${fileText}\n\`\`\`\n\n`; - chatItem.extra.fileLength = fileWrapped.length; - regexedMessage = fileWrapped + regexedMessage; - } - } + regexedMessage = await appendFileContent(chatItem, regexedMessage); return { ...chatItem, mes: regexedMessage, + index, }; })); diff --git a/public/scripts/chats.js b/public/scripts/chats.js index cd8a8677e..176d4d6a0 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -341,6 +341,25 @@ function embedMessageFile(messageId, messageBlock) { } } +/** + * Appends file content to the message text. + * @param {object} message Message object + * @param {string} messageText Message text + * @returns {Promise} Message text with file content appended. + */ +export async function appendFileContent(message, messageText) { + if (message.extra?.file) { + const fileText = message.extra.file.text || (await getFileAttachment(message.extra.file.url)); + + if (fileText) { + const fileWrapped = `\`\`\`\n${fileText}\n\`\`\`\n\n`; + message.extra.fileLength = fileWrapped.length; + messageText = fileWrapped + messageText; + } + } + return messageText; +} + jQuery(function () { $(document).on('click', '.mes_hide', async function () { const messageBlock = $(this).closest('.mes'); diff --git a/public/scripts/extensions.js b/public/scripts/extensions.js index 14a5f286e..3ce240060 100644 --- a/public/scripts/extensions.js +++ b/public/scripts/extensions.js @@ -879,7 +879,7 @@ async function runGenerationInterceptors(chat, contextSize) { exitImmediately = immediately; }; - for (const manifest of Object.values(manifests)) { + for (const manifest of Object.values(manifests).sort((a, b) => a.loading_order - b.loading_order)) { const interceptorKey = manifest.generate_interceptor; if (typeof window[interceptorKey] === 'function') { try { diff --git a/public/scripts/utils.js b/public/scripts/utils.js index 7699263ba..6ea0508d3 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -1143,11 +1143,13 @@ export async function extractTextFromPDF(blob) { * @param {Blob} blob HTML content blob * @returns {Promise} A promise that resolves to the parsed text. */ -export async function extractTextFromHTML(blob) { +export async function extractTextFromHTML(blob, textSelector = 'body') { const html = await blob.text(); const domParser = new DOMParser(); const document = domParser.parseFromString(DOMPurify.sanitize(html), 'text/html'); - const text = postProcessText(document.body.textContent); + const elements = document.querySelectorAll(textSelector); + const rawText = Array.from(elements).map(e => e.textContent).join('\n'); + const text = postProcessText(rawText); return text; } diff --git a/src/endpoints/serpapi.js b/src/endpoints/serpapi.js index e41cb543e..3ba6a134d 100644 --- a/src/endpoints/serpapi.js +++ b/src/endpoints/serpapi.js @@ -5,6 +5,23 @@ const { jsonParser } = require('../express-common'); const router = express.Router(); +// Cosplay as Firefox +const visitHeaders = { + 'Accept': '*/*', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache', + 'TE': 'trailers', + 'DNT': '1', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'none', + 'Sec-Fetch-User': '?1', +}; + router.post('/search', jsonParser, async (request, response) => { try { const key = readSecret(SECRET_KEYS.SERPAPI); @@ -31,4 +48,28 @@ router.post('/search', jsonParser, async (request, response) => { } }); +router.post('/visit', jsonParser, async (request, response) => { + try { + const url = request.body.url; + + if (!url) { + console.log('No url provided for /visit'); + return response.sendStatus(400); + } + + const result = await fetch(url, { headers: visitHeaders }); + + if (!result.ok) { + console.log(`Visit failed ${result.status} ${result.statusText}`); + return response.sendStatus(500); + } + + const text = await result.text(); + return response.send(text); + } catch (error) { + console.log(error); + return response.sendStatus(500); + } +}); + module.exports = { router }; From 07fecacce2aa05523ea4a0b13ba0da4628e206f8 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 01:24:21 +0200 Subject: [PATCH 070/179] Add to macro help --- public/scripts/templates/macros.html | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scripts/templates/macros.html b/public/scripts/templates/macros.html index 2eeb6537c..a3e0e83a0 100644 --- a/public/scripts/templates/macros.html +++ b/public/scripts/templates/macros.html @@ -17,6 +17,7 @@
  • {{char}} – the Character's name
  • {{lastMessage}} - the text of the latest chat message.
  • {{lastMessageId}} – index # of the latest chat message. Useful for slash command batching.
  • +
  • {{firstIncludedMessageId}} - the ID of the first message included in the context. Requires generation to be ran at least once in the current session.
  • {{currentSwipeId}} – the 1-based ID of the current swipe in the last chat message. Empty string if the last message is user or prompt-hidden.
  • {{lastSwipeId}} – the number of swipes in the last chat message. Empty string if the last message is user or prompt-hidden.
  • {{// (note)}} – you can leave a note here, and the macro will be replaced with blank content. Not visible for the AI.
  • From 2ca9015a5f9f124513eaa881ee9b8d770694f263 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 03:56:36 +0200 Subject: [PATCH 071/179] Add filters to serpapi/visit --- src/endpoints/serpapi.js | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/endpoints/serpapi.js b/src/endpoints/serpapi.js index 3ba6a134d..62c50693a 100644 --- a/src/endpoints/serpapi.js +++ b/src/endpoints/serpapi.js @@ -7,7 +7,7 @@ const router = express.Router(); // Cosplay as Firefox const visitHeaders = { - 'Accept': '*/*', + 'Accept': 'text/html', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:120.0) Gecko/20100101 Firefox/120.0', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', @@ -57,6 +57,33 @@ router.post('/visit', jsonParser, async (request, response) => { return response.sendStatus(400); } + try { + const urlObj = new URL(url); + + // Reject relative URLs + if (urlObj.protocol === null || urlObj.host === null) { + throw new Error('Invalid URL format'); + } + + // Reject non-HTTP URLs + if (urlObj.protocol !== 'http:' && urlObj.protocol !== 'https:') { + throw new Error('Invalid protocol'); + } + + // Reject URLs with a non-standard port + if (urlObj.port !== '') { + throw new Error('Invalid port'); + } + + // Reject IP addresses + if (urlObj.hostname.match(/^\d+\.\d+\.\d+\.\d+$/)) { + throw new Error('Invalid hostname'); + } + } catch (error) { + console.log('Invalid url provided for /visit', url); + return response.sendStatus(400); + } + const result = await fetch(url, { headers: visitHeaders }); if (!result.ok) { @@ -64,6 +91,12 @@ router.post('/visit', jsonParser, async (request, response) => { return response.sendStatus(500); } + const contentType = String(result.headers.get('content-type')); + if (!contentType.includes('text/html')) { + console.log(`Visit failed, content-type is ${contentType}, expected text/html`); + return response.sendStatus(500); + } + const text = await result.text(); return response.send(text); } catch (error) { From 9176f46caf1e2a92f55aa33ff95dde69e941a13f Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 19:14:17 +0200 Subject: [PATCH 072/179] Add /preset command --- public/index.html | 2 +- public/script.js | 45 ++++++++++- public/scripts/openai.js | 45 ----------- public/scripts/preset-manager.js | 134 +++++++++++++++++++++++++++++-- 4 files changed, 169 insertions(+), 57 deletions(-) diff --git a/public/index.html b/public/index.html index 04d9e1d51..7eeccf4e0 100644 --- a/public/index.html +++ b/public/index.html @@ -177,7 +177,7 @@

    Chat Completion Presets

    -
    diff --git a/public/script.js b/public/script.js index 6b3b10e3a..bc29aff53 100644 --- a/public/script.js +++ b/public/script.js @@ -192,6 +192,7 @@ import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay import { loadMancerModels } from './scripts/mancer-settings.js'; import { appendFileContent, hasPendingFileAttachment, populateFileAttachment } from './scripts/chats.js'; import { replaceVariableMacros } from './scripts/variables.js'; +import { initPresetManager } from './scripts/preset-manager.js'; //exporting functions and vars for mods export { @@ -738,6 +739,7 @@ async function firstLoadInit() { await getCharacters(); await getBackgrounds(); await initTokenizers(); + await initPresetManager(); initBackgrounds(); initAuthorsNote(); initPersonas(); @@ -7446,7 +7448,10 @@ const swipe_right = () => { } }; -function connectAPISlash(_, text) { +/** + * @param {string} text API name + */ +async function connectAPISlash(_, text) { if (!text) return; const apiMap = { @@ -7460,7 +7465,29 @@ function connectAPISlash(_, text) { button: '#api_button_novel', }, 'ooba': { + selected: 'textgenerationwebui', button: '#api_button_textgenerationwebui', + type: textgen_types.OOBA, + }, + 'tabby': { + selected: 'textgenerationwebui', + button: '#api_button_textgenerationwebui', + type: textgen_types.TABBY, + }, + 'mancer': { + selected: 'textgenerationwebui', + button: '#api_button_textgenerationwebui', + type: textgen_types.MANCER, + }, + 'aphrodite': { + selected: 'textgenerationwebui', + button: '#api_button_textgenerationwebui', + type: textgen_types.APHRODITE, + }, + 'kcpp': { + selected: 'textgenerationwebui', + button: '#api_button_textgenerationwebui', + type: textgen_types.KOBOLDCPP, }, 'oai': { selected: 'openai', @@ -7499,7 +7526,7 @@ function connectAPISlash(_, text) { }, }; - const apiConfig = apiMap[text]; + const apiConfig = apiMap[text.toLowerCase()]; if (!apiConfig) { toastr.error(`Error: ${text} is not a valid API`); return; @@ -7513,11 +7540,23 @@ function connectAPISlash(_, text) { $('#chat_completion_source').trigger('change'); } + if (apiConfig.type) { + $(`#textgen_type option[value='${apiConfig.type}']`).prop('selected', true); + $('#textgen_type').trigger('change'); + } + if (apiConfig.button) { $(apiConfig.button).trigger('click'); } toastr.info(`API set to ${text}, trying to connect..`); + + try { + await waitUntilCondition(() => online_status !== 'no_connection', 5000, 100); + console.log('Connection successful'); + } catch { + console.log('Could not connect after 5 seconds, skipping.'); + } } export async function processDroppedFiles(files) { @@ -7771,7 +7810,7 @@ jQuery(async function () { } registerSlashCommand('dupe', DupeChar, [], '– duplicates the currently selected character', true, true); - registerSlashCommand('api', connectAPISlash, [], '(kobold, horde, novel, ooba, oai, claude, windowai, openrouter, scale, ai21, palm) – connect to an API', true, true); + registerSlashCommand('api', connectAPISlash, [], '(kobold, horde, novel, ooba, tabby, mancer, aphrodite, kcpp, oai, claude, windowai, openrouter, scale, ai21, palm) – connect to an API', true, true); registerSlashCommand('impersonate', doImpersonate, ['imp'], '– calls an impersonation response', true, true); registerSlashCommand('delchat', doDeleteChat, [], '– deletes the current chat', true, true); registerSlashCommand('closechat', doCloseChat, [], '– closes the current chat', true, true); diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 12cfcd92a..0631c0a7a 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -2478,28 +2478,6 @@ function showWindowExtensionError() { }); } -function trySelectPresetByName(name) { - let preset_found = null; - for (const key in openai_setting_names) { - if (name.trim() == key.trim()) { - preset_found = key; - break; - } - } - - // Don't change if the current preset is the same - if (preset_found && preset_found === oai_settings.preset_settings_openai) { - return; - } - - if (preset_found) { - oai_settings.preset_settings_openai = preset_found; - const value = openai_setting_names[preset_found]; - $(`#settings_preset_openai option[value="${value}"]`).attr('selected', true); - $('#settings_preset_openai').val(value).trigger('change'); - } -} - /** * Persist a settings preset with the given name * @@ -3573,29 +3551,6 @@ $(document).ready(async function () { saveSettingsDebounced(); }); - // auto-select a preset based on character/group name - $(document).on('click', '.character_select', function () { - const chid = $(this).attr('chid'); - const name = characters[chid]?.name; - - if (!name) { - return; - } - - trySelectPresetByName(name); - }); - - $(document).on('click', '.group_select', function () { - const grid = $(this).data('id'); - const name = groups.find(x => x.id === grid)?.name; - - if (!name) { - return; - } - - trySelectPresetByName(name); - }); - $('#update_oai_preset').on('click', async function () { const name = oai_settings.preset_settings_openai; await saveOpenAIPreset(name, oai_settings); diff --git a/public/scripts/preset-manager.js b/public/scripts/preset-manager.js index f48ab287b..8b88f97a1 100644 --- a/public/scripts/preset-manager.js +++ b/public/scripts/preset-manager.js @@ -12,6 +12,7 @@ import { nai_settings, novelai_setting_names, novelai_settings, + online_status, saveSettingsDebounced, this_chid, } from '../script.js'; @@ -19,6 +20,7 @@ import { groups, selected_group } from './group-chats.js'; import { instruct_presets } from './instruct-mode.js'; import { kai_settings } from './kai-settings.js'; import { context_presets, getContextSettings, power_user } from './power-user.js'; +import { registerSlashCommand } from './slash-commands.js'; import { textgenerationwebui_preset_names, textgenerationwebui_presets, @@ -28,6 +30,9 @@ import { download, parseJsonFile, waitUntilCondition } from './utils.js'; const presetManagers = {}; +/** + * Automatically select a preset for current API based on character or group name. + */ function autoSelectPreset() { const presetManager = getPresetManager(); @@ -57,7 +62,12 @@ function autoSelectPreset() { } } -function getPresetManager(apiId) { +/** + * Gets a preset manager by API id. + * @param {string} apiId API id + * @returns {PresetManager} Preset manager + */ +function getPresetManager(apiId = '') { if (!apiId) { apiId = main_api == 'koboldhorde' ? 'kobold' : main_api; } @@ -69,6 +79,9 @@ function getPresetManager(apiId) { return presetManagers[apiId]; } +/** + * Registers preset managers for all select elements with data-preset-manager-for attribute. + */ function registerPresetManagers() { $('select[data-preset-manager-for]').each((_, e) => { const forData = $(e).data('preset-manager-for'); @@ -85,21 +98,46 @@ class PresetManager { this.apiId = apiId; } + /** + * Gets all preset names. + * @returns {string[]} List of preset names + */ + getAllPresets() { + return $(this.select).find('option').map((_, el) => el.text).toArray(); + } + + /** + * Finds a preset by name. + * @param {string} name Preset name + * @returns {any} Preset value + */ findPreset(name) { return $(this.select).find(`option:contains(${name})`).val(); } + /** + * Gets the selected preset value. + * @returns {any} Selected preset value + */ getSelectedPreset() { return $(this.select).find('option:selected').val(); } + /** + * Gets the selected preset name. + * @returns {string} Selected preset name + */ getSelectedPresetName() { return $(this.select).find('option:selected').text(); } - selectPreset(preset) { - $(this.select).find(`option[value=${preset}]`).prop('selected', true); - $(this.select).val(preset).trigger('change'); + /** + * Selects a preset by option value. + * @param {string} value Preset option value + */ + selectPreset(value) { + $(this.select).find(`option[value=${value}]`).prop('selected', true); + $(this.select).val(value).trigger('change'); } async updatePreset() { @@ -334,11 +372,91 @@ class PresetManager { } } -jQuery(async () => { - await waitUntilCondition(() => eventSource !== undefined); +/** + * Selects a preset by name for current API. + * @param {any} _ Named arguments + * @param {string} name Unnamed arguments + * @returns {Promise} Selected or current preset name + */ +async function presetCommandCallback(_, name) { + const shouldReconnect = online_status !== 'no_connection'; + const presetManager = getPresetManager(); + const allPresets = presetManager.getAllPresets(); + const currentPreset = presetManager.getSelectedPresetName(); + if (!presetManager) { + console.debug(`Preset Manager not found for API: ${main_api}`); + return ''; + } + + if (!name) { + console.log('No name provided for /preset command, using current preset'); + return currentPreset; + } + + if (!Array.isArray(allPresets) || allPresets.length === 0) { + console.log(`No presets found for API: ${main_api}`); + return currentPreset; + } + + // Find exact match + const exactMatch = allPresets.find(p => p.toLowerCase().trim() === name.toLowerCase().trim()); + + if (exactMatch) { + console.log('Found exact preset match', exactMatch); + + if (currentPreset !== exactMatch) { + const presetValue = presetManager.findPreset(exactMatch); + + if (presetValue) { + presetManager.selectPreset(presetValue); + shouldReconnect && await waitForConnection(); + } + } + + return exactMatch; + } else { + // Find fuzzy match + const fuse = new Fuse(allPresets); + const fuzzyMatch = fuse.search(name); + + if (!fuzzyMatch.length) { + console.warn(`WARN: Preset found with name ${name}`); + return currentPreset; + } + + const fuzzyPresetName = fuzzyMatch[0].item; + const fuzzyPresetValue = presetManager.findPreset(fuzzyPresetName); + + if (fuzzyPresetValue) { + console.log('Found fuzzy preset match', fuzzyPresetName); + + if (currentPreset !== fuzzyPresetName) { + presetManager.selectPreset(fuzzyPresetValue); + shouldReconnect && await waitForConnection(); + } + } + + return fuzzyPresetName; + } +} + +/** + * Waits for API connection to be established. + */ +async function waitForConnection() { + try { + await waitUntilCondition(() => online_status !== 'no_connection', 5000, 100); + } catch { + console.log('Timeout waiting for API to connect'); + } +} + +export async function initPresetManager() { eventSource.on(event_types.CHAT_CHANGED, autoSelectPreset); registerPresetManagers(); + registerSlashCommand('preset', presetCommandCallback, [], '(name) – sets a preset by name for the current API', true, true); + $(document).on('click', '[data-preset-manager-update]', async function () { const apiId = $(this).data('preset-manager-update'); const presetManager = getPresetManager(apiId); @@ -440,7 +558,7 @@ jQuery(async () => { saveSettingsDebounced(); }); - $(document).on('click', '[data-preset-manager-restore]', async function() { + $(document).on('click', '[data-preset-manager-restore]', async function () { const apiId = $(this).data('preset-manager-restore'); const presetManager = getPresetManager(apiId); @@ -490,4 +608,4 @@ jQuery(async () => { toastr.success('Preset restored'); } }); -}); +} From 9160de7714deab991cc571947593f10beb83196a Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 19:24:32 +0200 Subject: [PATCH 073/179] Run macros on impersonation prompt --- public/scripts/openai.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 0631c0a7a..a9bc5e304 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -913,6 +913,7 @@ function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, wor const scenarioText = Scenario && oai_settings.scenario_format ? substituteParams(oai_settings.scenario_format) : ''; const charPersonalityText = charPersonality && oai_settings.personality_format ? substituteParams(oai_settings.personality_format) : ''; const groupNudge = substituteParams(oai_settings.group_nudge_prompt); + const impersonationPrompt = oai_settings.impersonation_prompt ? substituteParams(oai_settings.impersonation_prompt) : ''; // Create entries for system prompts const systemPrompts = [ @@ -924,7 +925,7 @@ function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, wor { role: 'system', content: scenarioText, identifier: 'scenario' }, { role: 'system', content: personaDescription, identifier: 'personaDescription' }, // Unordered prompts without marker - { role: 'system', content: oai_settings.impersonation_prompt, identifier: 'impersonate' }, + { role: 'system', content: impersonationPrompt, identifier: 'impersonate' }, { role: 'system', content: quietPrompt, identifier: 'quietPrompt' }, { role: 'system', content: bias, identifier: 'bias' }, { role: 'system', content: groupNudge, identifier: 'groupNudge' }, From 83f2c1a8edd2f79af85e597a8789a0697f25b0bc Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 22:11:23 +0200 Subject: [PATCH 074/179] #1524 Add FPS limiter to streamed rendering --- public/index.html | 13 +++++++++++++ public/script.js | 8 +++++++- public/scripts/power-user.js | 12 +++++++++++- public/scripts/utils.js | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/public/index.html b/public/index.html index 7eeccf4e0..348a47fa3 100644 --- a/public/index.html +++ b/public/index.html @@ -2890,6 +2890,19 @@
    +
    +
    + Streaming FPS +
    +
    +
    + +
    +
    + +
    +
    +

    diff --git a/public/script.js b/public/script.js index bc29aff53..fdd5a76fd 100644 --- a/public/script.js +++ b/public/script.js @@ -143,6 +143,7 @@ import { onlyUnique, getBase64Async, humanFileSize, + Stopwatch, } from './scripts/utils.js'; import { ModuleWorkerWrapper, doDailyExtensionUpdatesCheck, extension_settings, getContext, loadExtensionSettings, processExtensionHelpers, registerExtensionHelper, renderExtensionTemplate, runGenerationInterceptors, saveMetadataDebounced } from './scripts/extensions.js'; @@ -2805,7 +2806,10 @@ class StreamingProcessor { } try { + const sw = new Stopwatch(1000 / power_user.streaming_fps); + const timestamps = []; for await (const { text, swipes } of this.generator()) { + timestamps.push(Date.now()); if (this.isStopped) { this.onStopStreaming(); return; @@ -2813,8 +2817,10 @@ class StreamingProcessor { this.result = text; this.swipes = swipes; - this.onProgressStreaming(this.messageId, message_already_generated + text); + await sw.tick(() => this.onProgressStreaming(this.messageId, message_already_generated + text)); } + const seconds = (timestamps[timestamps.length - 1] - timestamps[0]) / 1000; + console.warn(`Stream stats: ${timestamps.length} tokens, ${seconds.toFixed(2)} seconds, rate: ${Number(timestamps.length / seconds).toFixed(2)} TPS`); } catch (err) { console.error(err); diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index 9689fb212..2983047d9 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -114,6 +114,7 @@ let power_user = { }, markdown_escape_strings: '', chat_truncation: 100, + streaming_fps: 30, ui_mode: ui_mode.POWER, fast_ui_mode: true, @@ -1460,6 +1461,9 @@ function loadPowerUserSettings(settings, data) { $('#chat_truncation').val(power_user.chat_truncation); $('#chat_truncation_counter').val(power_user.chat_truncation); + $('#streaming_fps').val(power_user.streaming_fps); + $('#streaming_fps_counter').val(power_user.streaming_fps); + $('#font_scale').val(power_user.font_scale); $('#font_scale_counter').val(power_user.font_scale); @@ -2701,6 +2705,12 @@ $(document).ready(() => { saveSettingsDebounced(); }); + $('#streaming_fps').on('input', function () { + power_user.streaming_fps = Number($('#streaming_fps').val()); + $('#streaming_fps_counter').val(power_user.streaming_fps); + saveSettingsDebounced(); + }); + $('input[name="font_scale"]').on('input', async function (e) { power_user.font_scale = Number(e.target.value); $('#font_scale_counter').val(power_user.font_scale); @@ -3134,7 +3144,7 @@ $(document).ready(() => { saveSettingsDebounced(); }); - $('#reduced_motion').on('input', function() { + $('#reduced_motion').on('input', function () { power_user.reduced_motion = !!$(this).prop('checked'); localStorage.setItem(storage_keys.reduced_motion, String(power_user.reduced_motion)); switchReducedMotion(); diff --git a/public/scripts/utils.js b/public/scripts/utils.js index 6ea0508d3..dfaba74ce 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -741,6 +741,38 @@ export function escapeRegex(string) { return string.replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&'); } +export class Stopwatch { + /** + * Initializes a Stopwatch class. + * @param {number} interval Update interval in milliseconds. Must be a finite number above zero. + */ + constructor(interval) { + if (isNaN(interval) || !isFinite(interval) || interval <= 0) { + console.warn('Invalid interval for Stopwatch, setting to 1'); + interval = 1; + } + + this.interval = interval; + this.lastAction = Date.now(); + } + + /** + * Executes a function if the interval passed. + * @param {(arg0: any) => any} action Action function + * @returns Promise + */ + async tick(action) { + const passed = (Date.now() - this.lastAction); + + if (passed < this.interval) { + return; + } + + await action(); + this.lastAction = Date.now(); + } +} + /** * Provides an interface for rate limiting function calls. */ From 3d7706e6b3e6acded84e7b0866c55bcec70b5348 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 12 Dec 2023 23:09:39 +0200 Subject: [PATCH 075/179] #1524 Skip stop strings clean-up during streaming --- public/script.js | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/public/script.js b/public/script.js index fdd5a76fd..f430009a2 100644 --- a/public/script.js +++ b/public/script.js @@ -2629,12 +2629,12 @@ class StreamingProcessor { if (!isImpersonate && !isContinue && Array.isArray(this.swipes) && this.swipes.length > 0) { for (let i = 0; i < this.swipes.length; i++) { - this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true); + this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true, !isFinal); } } text = this.removePrefix(text); - let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal); + let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal, !isFinal); // Predict unbalanced asterisks / quotes during streaming const charsToBalance = ['*', '"', '```']; @@ -2907,7 +2907,7 @@ export async function generateRaw(prompt, api, instructOverride) { throw new Error(data.error); } - const message = cleanUpMessage(extractMessageFromData(data), false, false, true); + const message = cleanUpMessage(extractMessageFromData(data), false, false, true, false); if (!message) { throw new Error('No message generated'); @@ -3814,7 +3814,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu streamingProcessor.generator = streamingGenerator; hideSwipeButtons(); let getMessage = await streamingProcessor.generate(); - let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false); if (isContinue) { getMessage = continue_mag + getMessage; @@ -3849,7 +3849,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu const swipes = extractMultiSwipes(data, type); - messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); + messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false); if (isContinue) { getMessage = continue_mag + getMessage; @@ -3857,7 +3857,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu //Formating const displayIncomplete = type === 'quiet' && !quietToLoud; - getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete); + getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete, false); if (getMessage.length > 0) { if (isImpersonate) { @@ -4487,7 +4487,7 @@ function extractMultiSwipes(data, type) { } for (let i = 1; i < data.choices.length; i++) { - const text = cleanUpMessage(data.choices[i].text, false, false, false); + const text = cleanUpMessage(data.choices[i].text, false, false, false, false); swipes.push(text); } } @@ -4495,7 +4495,7 @@ function extractMultiSwipes(data, type) { return swipes; } -function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false) { +function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false, skipStopStringCleanup = false) { if (!getMessage) { return ''; } @@ -4510,14 +4510,16 @@ function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete getMessage = substituteParams(power_user.user_prompt_bias) + getMessage; } - const stoppingStrings = getStoppingStrings(isImpersonate, isContinue); + if (!skipStopStringCleanup) { + const stoppingStrings = getStoppingStrings(isImpersonate, isContinue); - for (const stoppingString of stoppingStrings) { - if (stoppingString.length) { - for (let j = stoppingString.length; j > 0; j--) { - if (getMessage.slice(-j) === stoppingString.slice(0, j)) { - getMessage = getMessage.slice(0, -j); - break; + for (const stoppingString of stoppingStrings) { + if (stoppingString.length) { + for (let j = stoppingString.length; j > 0; j--) { + if (getMessage.slice(-j) === stoppingString.slice(0, j)) { + getMessage = getMessage.slice(0, -j); + break; + } } } } From 87cbe361fca5670d59f8931fbf312ef5f733aa3e Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 12 Dec 2023 16:32:54 -0500 Subject: [PATCH 076/179] Cache stopping strings rather than skipping them --- public/script.js | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/public/script.js b/public/script.js index f430009a2..cd95d6e0c 100644 --- a/public/script.js +++ b/public/script.js @@ -2629,12 +2629,12 @@ class StreamingProcessor { if (!isImpersonate && !isContinue && Array.isArray(this.swipes) && this.swipes.length > 0) { for (let i = 0; i < this.swipes.length; i++) { - this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true, !isFinal); + this.swipes[i] = cleanUpMessage(this.removePrefix(this.swipes[i]), false, false, true, this.stoppingStrings); } } text = this.removePrefix(text); - let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal, !isFinal); + let processedText = cleanUpMessage(text, isImpersonate, isContinue, !isFinal, this.stoppingStrings); // Predict unbalanced asterisks / quotes during streaming const charsToBalance = ['*', '"', '```']; @@ -2805,6 +2805,10 @@ class StreamingProcessor { scrollLock = false; } + const isImpersonate = this.type == 'impersonate'; + const isContinue = this.type == 'continue'; + this.stoppingStrings = getStoppingStrings(isImpersonate, isContinue); + try { const sw = new Stopwatch(1000 / power_user.streaming_fps); const timestamps = []; @@ -2907,7 +2911,7 @@ export async function generateRaw(prompt, api, instructOverride) { throw new Error(data.error); } - const message = cleanUpMessage(extractMessageFromData(data), false, false, true, false); + const message = cleanUpMessage(extractMessageFromData(data), false, false, true); if (!message) { throw new Error('No message generated'); @@ -3814,7 +3818,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu streamingProcessor.generator = streamingGenerator; hideSwipeButtons(); let getMessage = await streamingProcessor.generate(); - let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false); + let messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); if (isContinue) { getMessage = continue_mag + getMessage; @@ -3849,7 +3853,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu const swipes = extractMultiSwipes(data, type); - messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false, false); + messageChunk = cleanUpMessage(getMessage, isImpersonate, isContinue, false); if (isContinue) { getMessage = continue_mag + getMessage; @@ -3857,7 +3861,7 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu //Formating const displayIncomplete = type === 'quiet' && !quietToLoud; - getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete, false); + getMessage = cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete); if (getMessage.length > 0) { if (isImpersonate) { @@ -4487,7 +4491,7 @@ function extractMultiSwipes(data, type) { } for (let i = 1; i < data.choices.length; i++) { - const text = cleanUpMessage(data.choices[i].text, false, false, false, false); + const text = cleanUpMessage(data.choices[i].text, false, false, false); swipes.push(text); } } @@ -4495,7 +4499,7 @@ function extractMultiSwipes(data, type) { return swipes; } -function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false, skipStopStringCleanup = false) { +function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncompleteSentences = false, stoppingStrings = null) { if (!getMessage) { return ''; } @@ -4510,16 +4514,18 @@ function cleanUpMessage(getMessage, isImpersonate, isContinue, displayIncomplete getMessage = substituteParams(power_user.user_prompt_bias) + getMessage; } - if (!skipStopStringCleanup) { - const stoppingStrings = getStoppingStrings(isImpersonate, isContinue); + // Allow for caching of stopping strings. getStoppingStrings is an expensive function, especially with macros + // enabled, so for streaming, we call it once and then pass it into each cleanUpMessage call. + if (!stoppingStrings) { + stoppingStrings = getStoppingStrings(isImpersonate, isContinue); + } - for (const stoppingString of stoppingStrings) { - if (stoppingString.length) { - for (let j = stoppingString.length; j > 0; j--) { - if (getMessage.slice(-j) === stoppingString.slice(0, j)) { - getMessage = getMessage.slice(0, -j); - break; - } + for (const stoppingString of stoppingStrings) { + if (stoppingString.length) { + for (let j = stoppingString.length; j > 0; j--) { + if (getMessage.slice(-j) === stoppingString.slice(0, j)) { + getMessage = getMessage.slice(0, -j); + break; } } } From 7732865e4c7f3b1bdd47a5741732bf802e3698c4 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 12 Dec 2023 16:36:47 -0500 Subject: [PATCH 077/179] Another explanatory comment --- public/script.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/script.js b/public/script.js index cd95d6e0c..98e31e8ad 100644 --- a/public/script.js +++ b/public/script.js @@ -2805,6 +2805,8 @@ class StreamingProcessor { scrollLock = false; } + // Stopping strings are expensive to calculate, especially with macros enabled. To remove stopping strings + // when streaming, we cache the result of getStoppingStrings instead of calling it once per token. const isImpersonate = this.type == 'impersonate'; const isContinue = this.type == 'continue'; this.stoppingStrings = getStoppingStrings(isImpersonate, isContinue); From 5b3c96df506a7216229562f454b41939d0397a93 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:13:26 -0500 Subject: [PATCH 078/179] Rename /textgenerationwebui endpoint I'd like to migrate over to using "textgen" to mean text-generation APIs in general, so I've renamed the /textgenerationwebui/* endpoints to /backends/text-completions/*. --- public/script.js | 4 ++-- public/scripts/textgen-settings.js | 2 +- server.js | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/public/script.js b/public/script.js index f430009a2..a7f1acd8f 100644 --- a/public/script.js +++ b/public/script.js @@ -928,7 +928,7 @@ async function getStatusKobold() { } async function getStatusTextgen() { - const url = '/api/textgenerationwebui/status'; + const url = '/api/backends/text-completions/status'; let endpoint = textgen_settings.type === MANCER ? MANCER_SERVER : @@ -4428,7 +4428,7 @@ function getGenerateUrl(api) { if (api == 'kobold') { generate_url = '/generate'; } else if (api == 'textgenerationwebui') { - generate_url = '/api/textgenerationwebui/generate'; + generate_url = '/api/backends/text-completions/generate'; } else if (api == 'novel') { generate_url = '/api/novelai/generate'; } diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 9c938f701..58a3aba62 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -468,7 +468,7 @@ function setSettingByName(setting, value, trigger) { async function generateTextGenWithStreaming(generate_data, signal) { generate_data.stream = true; - const response = await fetch('/api/textgenerationwebui/generate', { + const response = await fetch('/api/backends/text-completions/generate', { headers: { ...getRequestHeaders(), }, diff --git a/server.js b/server.js index ec9fb03aa..36f6a2ce0 100644 --- a/server.js +++ b/server.js @@ -447,7 +447,7 @@ app.post('/generate', jsonParser, async function (request, response_generate) { }); //************** Text generation web UI -app.post('/api/textgenerationwebui/status', jsonParser, async function (request, response) { +app.post('/api/backends/text-completions/status', jsonParser, async function (request, response) { if (!request.body) return response.sendStatus(400); try { @@ -555,7 +555,7 @@ app.post('/api/textgenerationwebui/status', jsonParser, async function (request, } }); -app.post('/api/textgenerationwebui/generate', jsonParser, async function (request, response_generate) { +app.post('/api/backends/text-completions/generate', jsonParser, async function (request, response_generate) { if (!request.body) return response_generate.sendStatus(400); try { From 35c2f8bf66f0b70ce811f594cd9e08fb7721ba3c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:19:53 -0500 Subject: [PATCH 079/179] Move text completions API endpoints to own module --- server.js | 204 +------------------------------- src/endpoints/backends/ooba.js | 207 +++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 198 deletions(-) create mode 100644 src/endpoints/backends/ooba.js diff --git a/server.js b/server.js index 36f6a2ce0..0a4973994 100644 --- a/server.js +++ b/server.js @@ -133,7 +133,7 @@ const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; const SETTINGS_FILE = './public/settings.json'; -const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, TEXTGEN_TYPES, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); +const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); // CORS Settings // const CORS = cors({ @@ -446,203 +446,6 @@ app.post('/generate', jsonParser, async function (request, response_generate) { return response_generate.send({ error: true }); }); -//************** Text generation web UI -app.post('/api/backends/text-completions/status', jsonParser, async function (request, response) { - if (!request.body) return response.sendStatus(400); - - try { - if (request.body.api_server.indexOf('localhost') !== -1) { - request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); - } - - console.log('Trying to connect to API:', request.body); - - // Convert to string + remove trailing slash + /v1 suffix - const baseUrl = String(request.body.api_server).replace(/\/$/, '').replace(/\/v1$/, ''); - - const args = { - headers: { 'Content-Type': 'application/json' }, - }; - - setAdditionalHeaders(request, args, baseUrl); - - let url = baseUrl; - let result = ''; - - if (request.body.legacy_api) { - url += '/v1/model'; - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.OOBA: - case TEXTGEN_TYPES.APHRODITE: - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/v1/models'; - break; - case TEXTGEN_TYPES.MANCER: - url += '/oai/v1/models'; - break; - case TEXTGEN_TYPES.TABBY: - url += '/v1/model/list'; - break; - } - } - - const modelsReply = await fetch(url, args); - - if (!modelsReply.ok) { - console.log('Models endpoint is offline.'); - return response.status(400); - } - - const data = await modelsReply.json(); - - if (request.body.legacy_api) { - console.log('Legacy API response:', data); - return response.send({ result: data?.result }); - } - - if (!Array.isArray(data.data)) { - console.log('Models response is not an array.'); - return response.status(400); - } - - const modelIds = data.data.map(x => x.id); - console.log('Models available:', modelIds); - - // Set result to the first model ID - result = modelIds[0] || 'Valid'; - - if (request.body.api_type === TEXTGEN_TYPES.OOBA) { - try { - const modelInfoUrl = baseUrl + '/v1/internal/model/info'; - const modelInfoReply = await fetch(modelInfoUrl, args); - - if (modelInfoReply.ok) { - const modelInfo = await modelInfoReply.json(); - console.log('Ooba model info:', modelInfo); - - const modelName = modelInfo?.model_name; - result = modelName || result; - } - } catch (error) { - console.error(`Failed to get Ooba model info: ${error}`); - } - } else if (request.body.api_type === TEXTGEN_TYPES.TABBY) { - try { - const modelInfoUrl = baseUrl + '/v1/model'; - const modelInfoReply = await fetch(modelInfoUrl, args); - - if (modelInfoReply.ok) { - const modelInfo = await modelInfoReply.json(); - console.log('Tabby model info:', modelInfo); - - const modelName = modelInfo?.id; - result = modelName || result; - } else { - // TabbyAPI returns an error 400 if a model isn't loaded - - result = 'None'; - } - } catch (error) { - console.error(`Failed to get TabbyAPI model info: ${error}`); - } - } - - return response.send({ result, data: data.data }); - } catch (error) { - console.error(error); - return response.status(500); - } -}); - -app.post('/api/backends/text-completions/generate', jsonParser, async function (request, response_generate) { - if (!request.body) return response_generate.sendStatus(400); - - try { - if (request.body.api_server.indexOf('localhost') !== -1) { - request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); - } - - const baseUrl = request.body.api_server; - console.log(request.body); - - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - // Convert to string + remove trailing slash + /v1 suffix - let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); - - if (request.body.legacy_api) { - url += '/v1/generate'; - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.APHRODITE: - case TEXTGEN_TYPES.OOBA: - case TEXTGEN_TYPES.TABBY: - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/v1/completions'; - break; - case TEXTGEN_TYPES.MANCER: - url += '/oai/v1/completions'; - break; - } - } - - const args = { - method: 'POST', - body: JSON.stringify(request.body), - headers: { 'Content-Type': 'application/json' }, - signal: controller.signal, - timeout: 0, - }; - - setAdditionalHeaders(request, args, baseUrl); - - if (request.body.stream) { - const completionsStream = await fetch(url, args); - // Pipe remote SSE stream to Express response - forwardFetchResponse(completionsStream, response_generate); - } - else { - const completionsReply = await fetch(url, args); - - if (completionsReply.ok) { - const data = await completionsReply.json(); - console.log('Endpoint response:', data); - - // Wrap legacy response to OAI completions format - if (request.body.legacy_api) { - const text = data?.results[0]?.text; - data['choices'] = [{ text }]; - } - - return response_generate.send(data); - } else { - const text = await completionsReply.text(); - const errorBody = { error: true, status: completionsReply.status, response: text }; - - if (!response_generate.headersSent) { - return response_generate.send(errorBody); - } - - return response_generate.end(); - } - } - } catch (error) { - let value = { error: true, status: error?.status, response: error?.statusText }; - console.log('Endpoint error:', error); - - if (!response_generate.headersSent) { - return response_generate.send(value); - } - - return response_generate.end(); - } -}); - // Only called for kobold app.post('/getstatus', jsonParser, async function (request, response) { if (!request.body) return response.sendStatus(400); @@ -1816,6 +1619,11 @@ app.use('/api/extra/caption', require('./src/endpoints/caption').router); // Web search extension app.use('/api/serpapi', require('./src/endpoints/serpapi').router); +// The different text generation APIs + +// Ooba/OpenAI text completions +app.use('/api/backends/ooba', require('./src/endpoints/backends/ooba').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/backends/ooba.js b/src/endpoints/backends/ooba.js new file mode 100644 index 000000000..75d9ea439 --- /dev/null +++ b/src/endpoints/backends/ooba.js @@ -0,0 +1,207 @@ +const express = require('express'); + +const { jsonParser } = require('../../express-common'); +const { TEXTGEN_TYPES } = require('../../constants'); +const { forwardFetchResponse } = require('../../util'); +const { setAdditionalHeaders } = require('../../additional-headers'); + +const router = express.Router(); + +//************** Ooba/OpenAI text completions API +router.post('/status', jsonParser, async function (request, response) { + if (!request.body) return response.sendStatus(400); + + try { + if (request.body.api_server.indexOf('localhost') !== -1) { + request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); + } + + console.log('Trying to connect to API:', request.body); + + // Convert to string + remove trailing slash + /v1 suffix + const baseUrl = String(request.body.api_server).replace(/\/$/, '').replace(/\/v1$/, ''); + + const args = { + headers: { 'Content-Type': 'application/json' }, + }; + + setAdditionalHeaders(request, args, baseUrl); + + let url = baseUrl; + let result = ''; + + if (request.body.legacy_api) { + url += '/v1/model'; + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.OOBA: + case TEXTGEN_TYPES.APHRODITE: + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/v1/models'; + break; + case TEXTGEN_TYPES.MANCER: + url += '/oai/v1/models'; + break; + case TEXTGEN_TYPES.TABBY: + url += '/v1/model/list'; + break; + } + } + + const modelsReply = await fetch(url, args); + + if (!modelsReply.ok) { + console.log('Models endpoint is offline.'); + return response.status(400); + } + + const data = await modelsReply.json(); + + if (request.body.legacy_api) { + console.log('Legacy API response:', data); + return response.send({ result: data?.result }); + } + + if (!Array.isArray(data.data)) { + console.log('Models response is not an array.'); + return response.status(400); + } + + const modelIds = data.data.map(x => x.id); + console.log('Models available:', modelIds); + + // Set result to the first model ID + result = modelIds[0] || 'Valid'; + + if (request.body.api_type === TEXTGEN_TYPES.OOBA) { + try { + const modelInfoUrl = baseUrl + '/v1/internal/model/info'; + const modelInfoReply = await fetch(modelInfoUrl, args); + + if (modelInfoReply.ok) { + const modelInfo = await modelInfoReply.json(); + console.log('Ooba model info:', modelInfo); + + const modelName = modelInfo?.model_name; + result = modelName || result; + } + } catch (error) { + console.error(`Failed to get Ooba model info: ${error}`); + } + } else if (request.body.api_type === TEXTGEN_TYPES.TABBY) { + try { + const modelInfoUrl = baseUrl + '/v1/model'; + const modelInfoReply = await fetch(modelInfoUrl, args); + + if (modelInfoReply.ok) { + const modelInfo = await modelInfoReply.json(); + console.log('Tabby model info:', modelInfo); + + const modelName = modelInfo?.id; + result = modelName || result; + } else { + // TabbyAPI returns an error 400 if a model isn't loaded + + result = 'None'; + } + } catch (error) { + console.error(`Failed to get TabbyAPI model info: ${error}`); + } + } + + return response.send({ result, data: data.data }); + } catch (error) { + console.error(error); + return response.status(500); + } +}); + +router.post('/generate', jsonParser, async function (request, response_generate) { + if (!request.body) return response_generate.sendStatus(400); + + try { + if (request.body.api_server.indexOf('localhost') !== -1) { + request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); + } + + const baseUrl = request.body.api_server; + console.log(request.body); + + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + // Convert to string + remove trailing slash + /v1 suffix + let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); + + if (request.body.legacy_api) { + url += '/v1/generate'; + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.APHRODITE: + case TEXTGEN_TYPES.OOBA: + case TEXTGEN_TYPES.TABBY: + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/v1/completions'; + break; + case TEXTGEN_TYPES.MANCER: + url += '/oai/v1/completions'; + break; + } + } + + const args = { + method: 'POST', + body: JSON.stringify(request.body), + headers: { 'Content-Type': 'application/json' }, + signal: controller.signal, + timeout: 0, + }; + + setAdditionalHeaders(request, args, baseUrl); + + if (request.body.stream) { + const completionsStream = await fetch(url, args); + // Pipe remote SSE stream to Express response + forwardFetchResponse(completionsStream, response_generate); + } + else { + const completionsReply = await fetch(url, args); + + if (completionsReply.ok) { + const data = await completionsReply.json(); + console.log('Endpoint response:', data); + + // Wrap legacy response to OAI completions format + if (request.body.legacy_api) { + const text = data?.results[0]?.text; + data['choices'] = [{ text }]; + } + + return response_generate.send(data); + } else { + const text = await completionsReply.text(); + const errorBody = { error: true, status: completionsReply.status, response: text }; + + if (!response_generate.headersSent) { + return response_generate.send(errorBody); + } + + return response_generate.end(); + } + } + } catch (error) { + let value = { error: true, status: error?.status, response: error?.statusText }; + console.log('Endpoint error:', error); + + if (!response_generate.headersSent) { + return response_generate.send(value); + } + + return response_generate.end(); + } +}); + +module.exports = { router }; From 274605a07c31a91453c7377dfe78eb2b63782e51 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:25:25 -0500 Subject: [PATCH 080/179] Rename Kobold-related endpoints --- public/script.js | 4 ++-- public/scripts/kai-settings.js | 2 +- server.js | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/public/script.js b/public/script.js index a7f1acd8f..19b651858 100644 --- a/public/script.js +++ b/public/script.js @@ -891,7 +891,7 @@ async function getStatusKobold() { } try { - const response = await fetch('/getstatus', { + const response = await fetch('/api/backends/kobold/status', { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify({ @@ -4426,7 +4426,7 @@ function setInContextMessages(lastmsg, type) { function getGenerateUrl(api) { let generate_url = ''; if (api == 'kobold') { - generate_url = '/generate'; + generate_url = '/api/backends/kobold/generate'; } else if (api == 'textgenerationwebui') { generate_url = '/api/backends/text-completions/generate'; } else if (api == 'novel') { diff --git a/public/scripts/kai-settings.js b/public/scripts/kai-settings.js index 7bdfc5f26..21037bb32 100644 --- a/public/scripts/kai-settings.js +++ b/public/scripts/kai-settings.js @@ -173,7 +173,7 @@ function tryParseStreamingError(response, decoded) { } export async function generateKoboldWithStreaming(generate_data, signal) { - const response = await fetch('/generate', { + const response = await fetch('/api/backends/kobold/generate', { headers: getRequestHeaders(), body: JSON.stringify(generate_data), method: 'POST', diff --git a/server.js b/server.js index 0a4973994..34cdf7c65 100644 --- a/server.js +++ b/server.js @@ -313,7 +313,7 @@ app.get('/version', async function (_, response) { }); //**************Kobold api -app.post('/generate', jsonParser, async function (request, response_generate) { +app.post('/api/backends/kobold/generate', jsonParser, async function (request, response_generate) { if (!request.body) return response_generate.sendStatus(400); if (request.body.api_server.indexOf('localhost') != -1) { @@ -447,7 +447,7 @@ app.post('/generate', jsonParser, async function (request, response_generate) { }); // Only called for kobold -app.post('/getstatus', jsonParser, async function (request, response) { +app.post('/api/backends/kobold/status', jsonParser, async function (request, response) { if (!request.body) return response.sendStatus(400); let api_server = request.body.api_server; if (api_server.indexOf('localhost') != -1) { From 2c159ff93f95e1276ee1a8eb37db4bb3fc668cce Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:27:16 -0500 Subject: [PATCH 081/179] Move Kobold API endpoints to their own module --- server.js | 187 +----------------------------- src/endpoints/backends/kobold.js | 188 +++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 184 deletions(-) create mode 100644 src/endpoints/backends/kobold.js diff --git a/server.js b/server.js index 34cdf7c65..7d0dd68fc 100644 --- a/server.js +++ b/server.js @@ -45,7 +45,6 @@ const { jsonParser, urlencodedParser } = require('./src/express-common.js'); const contentManager = require('./src/endpoints/content-manager'); const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets'); const { - delay, getVersion, getConfigValue, color, @@ -61,7 +60,6 @@ const { const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); const { convertClaudePrompt } = require('./src/chat-completion'); -const { getOverrideHeaders, setAdditionalHeaders } = require('./src/additional-headers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -312,188 +310,6 @@ app.get('/version', async function (_, response) { response.send(data); }); -//**************Kobold api -app.post('/api/backends/kobold/generate', jsonParser, async function (request, response_generate) { - if (!request.body) return response_generate.sendStatus(400); - - if (request.body.api_server.indexOf('localhost') != -1) { - request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); - } - - const request_prompt = request.body.prompt; - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', async function () { - if (request.body.can_abort && !response_generate.writableEnded) { - try { - console.log('Aborting Kobold generation...'); - // send abort signal to koboldcpp - const abortResponse = await fetch(`${request.body.api_server}/extra/abort`, { - method: 'POST', - }); - - if (!abortResponse.ok) { - console.log('Error sending abort request to Kobold:', abortResponse.status); - } - } catch (error) { - console.log(error); - } - } - controller.abort(); - }); - - let this_settings = { - prompt: request_prompt, - use_story: false, - use_memory: false, - use_authors_note: false, - use_world_info: false, - max_context_length: request.body.max_context_length, - max_length: request.body.max_length, - }; - - if (request.body.gui_settings == false) { - const sampler_order = [request.body.s1, request.body.s2, request.body.s3, request.body.s4, request.body.s5, request.body.s6, request.body.s7]; - this_settings = { - prompt: request_prompt, - use_story: false, - use_memory: false, - use_authors_note: false, - use_world_info: false, - max_context_length: request.body.max_context_length, - max_length: request.body.max_length, - rep_pen: request.body.rep_pen, - rep_pen_range: request.body.rep_pen_range, - rep_pen_slope: request.body.rep_pen_slope, - temperature: request.body.temperature, - tfs: request.body.tfs, - top_a: request.body.top_a, - top_k: request.body.top_k, - top_p: request.body.top_p, - min_p: request.body.min_p, - typical: request.body.typical, - sampler_order: sampler_order, - singleline: !!request.body.singleline, - use_default_badwordsids: request.body.use_default_badwordsids, - mirostat: request.body.mirostat, - mirostat_eta: request.body.mirostat_eta, - mirostat_tau: request.body.mirostat_tau, - grammar: request.body.grammar, - sampler_seed: request.body.sampler_seed, - }; - if (request.body.stop_sequence) { - this_settings['stop_sequence'] = request.body.stop_sequence; - } - } - - console.log(this_settings); - const args = { - body: JSON.stringify(this_settings), - headers: Object.assign( - { 'Content-Type': 'application/json' }, - getOverrideHeaders((new URL(request.body.api_server))?.host), - ), - signal: controller.signal, - }; - - const MAX_RETRIES = 50; - const delayAmount = 2500; - for (let i = 0; i < MAX_RETRIES; i++) { - try { - const url = request.body.streaming ? `${request.body.api_server}/extra/generate/stream` : `${request.body.api_server}/v1/generate`; - const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); - - if (request.body.streaming) { - // Pipe remote SSE stream to Express response - forwardFetchResponse(response, response_generate); - return; - } else { - if (!response.ok) { - const errorText = await response.text(); - console.log(`Kobold returned error: ${response.status} ${response.statusText} ${errorText}`); - - try { - const errorJson = JSON.parse(errorText); - const message = errorJson?.detail?.msg || errorText; - return response_generate.status(400).send({ error: { message } }); - } catch { - return response_generate.status(400).send({ error: { message: errorText } }); - } - } - - const data = await response.json(); - console.log('Endpoint response:', data); - return response_generate.send(data); - } - } catch (error) { - // response - switch (error?.status) { - case 403: - case 503: // retry in case of temporary service issue, possibly caused by a queue failure? - console.debug(`KoboldAI is busy. Retry attempt ${i + 1} of ${MAX_RETRIES}...`); - await delay(delayAmount); - break; - default: - if ('status' in error) { - console.log('Status Code from Kobold:', error.status); - } - return response_generate.send({ error: true }); - } - } - } - - console.log('Max retries exceeded. Giving up.'); - return response_generate.send({ error: true }); -}); - -// Only called for kobold -app.post('/api/backends/kobold/status', jsonParser, async function (request, response) { - if (!request.body) return response.sendStatus(400); - let api_server = request.body.api_server; - if (api_server.indexOf('localhost') != -1) { - api_server = api_server.replace('localhost', '127.0.0.1'); - } - - const args = { - headers: { 'Content-Type': 'application/json' }, - }; - - setAdditionalHeaders(request, args, api_server); - - const result = {}; - - const [koboldUnitedResponse, koboldExtraResponse, koboldModelResponse] = await Promise.all([ - // We catch errors both from the response not having a successful HTTP status and from JSON parsing failing - - // Kobold United API version - fetch(`${api_server}/v1/info/version`).then(response => { - if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); - return response.json(); - }).catch(() => ({ result: '0.0.0' })), - - // KoboldCpp version - fetch(`${api_server}/extra/version`).then(response => { - if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); - return response.json(); - }).catch(() => ({ version: '0.0' })), - - // Current model - fetch(`${api_server}/v1/model`).then(response => { - if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); - return response.json(); - }).catch(() => null), - ]); - - result.koboldUnitedVersion = koboldUnitedResponse.result; - result.koboldCppVersion = koboldExtraResponse.result; - result.model = !koboldModelResponse || koboldModelResponse.result === 'ReadOnly' ? - 'no_connection' : - koboldModelResponse.result; - - response.send(result); -}); - - app.post('/getuseravatars', jsonParser, function (request, response) { var images = getImages('public/User Avatars'); response.send(JSON.stringify(images)); @@ -1624,6 +1440,9 @@ app.use('/api/serpapi', require('./src/endpoints/serpapi').router); // Ooba/OpenAI text completions app.use('/api/backends/ooba', require('./src/endpoints/backends/ooba').router); +// KoboldAI +app.use('/api/textgen/kobold', require('./src/endpoints/textgen/kobold').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/backends/kobold.js b/src/endpoints/backends/kobold.js new file mode 100644 index 000000000..6093a4fa6 --- /dev/null +++ b/src/endpoints/backends/kobold.js @@ -0,0 +1,188 @@ +const express = require('express'); + +const { jsonParser } = require('../../express-common'); +const { forwardFetchResponse, delay } = require('../../util'); +const { getOverrideHeaders, setAdditionalHeaders } = require('../../additional-headers'); + +const router = express.Router(); + +router.post('/generate', jsonParser, async function (request, response_generate) { + if (!request.body) return response_generate.sendStatus(400); + + if (request.body.api_server.indexOf('localhost') != -1) { + request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1'); + } + + const request_prompt = request.body.prompt; + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', async function () { + if (request.body.can_abort && !response_generate.writableEnded) { + try { + console.log('Aborting Kobold generation...'); + // send abort signal to koboldcpp + const abortResponse = await fetch(`${request.body.api_server}/extra/abort`, { + method: 'POST', + }); + + if (!abortResponse.ok) { + console.log('Error sending abort request to Kobold:', abortResponse.status); + } + } catch (error) { + console.log(error); + } + } + controller.abort(); + }); + + let this_settings = { + prompt: request_prompt, + use_story: false, + use_memory: false, + use_authors_note: false, + use_world_info: false, + max_context_length: request.body.max_context_length, + max_length: request.body.max_length, + }; + + if (request.body.gui_settings == false) { + const sampler_order = [request.body.s1, request.body.s2, request.body.s3, request.body.s4, request.body.s5, request.body.s6, request.body.s7]; + this_settings = { + prompt: request_prompt, + use_story: false, + use_memory: false, + use_authors_note: false, + use_world_info: false, + max_context_length: request.body.max_context_length, + max_length: request.body.max_length, + rep_pen: request.body.rep_pen, + rep_pen_range: request.body.rep_pen_range, + rep_pen_slope: request.body.rep_pen_slope, + temperature: request.body.temperature, + tfs: request.body.tfs, + top_a: request.body.top_a, + top_k: request.body.top_k, + top_p: request.body.top_p, + min_p: request.body.min_p, + typical: request.body.typical, + sampler_order: sampler_order, + singleline: !!request.body.singleline, + use_default_badwordsids: request.body.use_default_badwordsids, + mirostat: request.body.mirostat, + mirostat_eta: request.body.mirostat_eta, + mirostat_tau: request.body.mirostat_tau, + grammar: request.body.grammar, + sampler_seed: request.body.sampler_seed, + }; + if (request.body.stop_sequence) { + this_settings['stop_sequence'] = request.body.stop_sequence; + } + } + + console.log(this_settings); + const args = { + body: JSON.stringify(this_settings), + headers: Object.assign( + { 'Content-Type': 'application/json' }, + getOverrideHeaders((new URL(request.body.api_server))?.host), + ), + signal: controller.signal, + }; + + const MAX_RETRIES = 50; + const delayAmount = 2500; + for (let i = 0; i < MAX_RETRIES; i++) { + try { + const url = request.body.streaming ? `${request.body.api_server}/extra/generate/stream` : `${request.body.api_server}/v1/generate`; + const response = await fetch(url, { method: 'POST', timeout: 0, ...args }); + + if (request.body.streaming) { + // Pipe remote SSE stream to Express response + forwardFetchResponse(response, response_generate); + return; + } else { + if (!response.ok) { + const errorText = await response.text(); + console.log(`Kobold returned error: ${response.status} ${response.statusText} ${errorText}`); + + try { + const errorJson = JSON.parse(errorText); + const message = errorJson?.detail?.msg || errorText; + return response_generate.status(400).send({ error: { message } }); + } catch { + return response_generate.status(400).send({ error: { message: errorText } }); + } + } + + const data = await response.json(); + console.log('Endpoint response:', data); + return response_generate.send(data); + } + } catch (error) { + // response + switch (error?.status) { + case 403: + case 503: // retry in case of temporary service issue, possibly caused by a queue failure? + console.debug(`KoboldAI is busy. Retry attempt ${i + 1} of ${MAX_RETRIES}...`); + await delay(delayAmount); + break; + default: + if ('status' in error) { + console.log('Status Code from Kobold:', error.status); + } + return response_generate.send({ error: true }); + } + } + } + + console.log('Max retries exceeded. Giving up.'); + return response_generate.send({ error: true }); +}); + +router.post('/status', jsonParser, async function (request, response) { + if (!request.body) return response.sendStatus(400); + let api_server = request.body.api_server; + if (api_server.indexOf('localhost') != -1) { + api_server = api_server.replace('localhost', '127.0.0.1'); + } + + const args = { + headers: { 'Content-Type': 'application/json' }, + }; + + setAdditionalHeaders(request, args, api_server); + + const result = {}; + + const [koboldUnitedResponse, koboldExtraResponse, koboldModelResponse] = await Promise.all([ + // We catch errors both from the response not having a successful HTTP status and from JSON parsing failing + + // Kobold United API version + fetch(`${api_server}/v1/info/version`).then(response => { + if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); + return response.json(); + }).catch(() => ({ result: '0.0.0' })), + + // KoboldCpp version + fetch(`${api_server}/extra/version`).then(response => { + if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); + return response.json(); + }).catch(() => ({ version: '0.0' })), + + // Current model + fetch(`${api_server}/v1/model`).then(response => { + if (!response.ok) throw new Error(`Kobold API error: ${response.status, response.statusText}`); + return response.json(); + }).catch(() => null), + ]); + + result.koboldUnitedVersion = koboldUnitedResponse.result; + result.koboldCppVersion = koboldExtraResponse.result; + result.model = !koboldModelResponse || koboldModelResponse.result === 'ReadOnly' ? + 'no_connection' : + koboldModelResponse.result; + + response.send(result); +}); + +module.exports = { router }; From 52de5869fecd95b5f2325c02a9f304676865b006 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 13 Dec 2023 02:22:35 +0200 Subject: [PATCH 082/179] Rename file, add missing fetch --- server.js | 2 +- src/endpoints/backends/{ooba.js => text-completions.js} | 1 + src/util.js | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) rename src/endpoints/backends/{ooba.js => text-completions.js} (99%) diff --git a/server.js b/server.js index 0a4973994..31718e577 100644 --- a/server.js +++ b/server.js @@ -1622,7 +1622,7 @@ app.use('/api/serpapi', require('./src/endpoints/serpapi').router); // The different text generation APIs // Ooba/OpenAI text completions -app.use('/api/backends/ooba', require('./src/endpoints/backends/ooba').router); +app.use('/api/backends/text-completions', require('./src/endpoints/backends/text-completions').router); const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + diff --git a/src/endpoints/backends/ooba.js b/src/endpoints/backends/text-completions.js similarity index 99% rename from src/endpoints/backends/ooba.js rename to src/endpoints/backends/text-completions.js index 75d9ea439..71387eefd 100644 --- a/src/endpoints/backends/ooba.js +++ b/src/endpoints/backends/text-completions.js @@ -1,4 +1,5 @@ const express = require('express'); +const fetch = require('node-fetch').default; const { jsonParser } = require('../../express-common'); const { TEXTGEN_TYPES } = require('../../constants'); diff --git a/src/util.js b/src/util.js index 11b864092..38714b02b 100644 --- a/src/util.js +++ b/src/util.js @@ -349,7 +349,7 @@ function getImages(path) { /** * Pipe a fetch() response to an Express.js Response, including status code. - * @param {Response} from The Fetch API response to pipe from. + * @param {import('node-fetch').Response} from The fetch response to pipe from. * @param {Express.Response} to The Express response to pipe to. */ function forwardFetchResponse(from, to) { From 0d0dd5e170c19a81660158fb6090ad2c9d60aa64 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 13 Dec 2023 02:50:50 +0200 Subject: [PATCH 083/179] Revert old comment --- src/util.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util.js b/src/util.js index 38714b02b..be8d5135f 100644 --- a/src/util.js +++ b/src/util.js @@ -349,7 +349,7 @@ function getImages(path) { /** * Pipe a fetch() response to an Express.js Response, including status code. - * @param {import('node-fetch').Response} from The fetch response to pipe from. + * @param {import('node-fetch').Response} from The Fetch API response to pipe from. * @param {Express.Response} to The Express response to pipe to. */ function forwardFetchResponse(from, to) { From cebd6e9e0f6f3e8086be0aa4a2d1b22b9829305e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:28:18 +0200 Subject: [PATCH 084/179] Add API token ids from KoboldCpp --- public/scripts/tokenizers.js | 29 +++++++++++++++++++++++++++++ src/endpoints/tokenizers.js | 5 +++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index bef54b791..decd0f919 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -63,6 +63,7 @@ const TOKENIZER_URLS = { }, [tokenizers.API_KOBOLD]: { count: '/api/tokenizers/remote/kobold/count', + encode: '/api/tokenizers/remote/kobold/count', }, [tokenizers.MISTRAL]: { encode: '/api/tokenizers/mistral/encode', @@ -617,6 +618,32 @@ function getTextTokensFromTextgenAPI(str) { return ids; } +/** + * Calls the AI provider's tokenize API to encode a string to tokens. + * @param {string} str String to tokenize. + * @returns {number[]} Array of token ids. + */ +function getTextTokensFromKoboldAPI(str) { + let ids = []; + + jQuery.ajax({ + async: false, + type: 'POST', + url: TOKENIZER_URLS[tokenizers.API_KOBOLD].encode, + data: JSON.stringify({ + text: str, + url: api_server, + }), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + ids = data.ids; + }, + }); + + return ids; +} + /** * Calls the underlying tokenizer model to decode token ids to text. * @param {string} endpoint API endpoint. @@ -650,6 +677,8 @@ export function getTextTokens(tokenizerType, str) { return getTextTokens(currentRemoteTokenizerAPI(), str); case tokenizers.API_TEXTGENERATIONWEBUI: return getTextTokensFromTextgenAPI(str); + case tokenizers.API_KOBOLD: + return getTextTokensFromKoboldAPI(str); default: { const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 27ef4faf3..a81779d97 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -562,7 +562,8 @@ router.post('/remote/kobold/count', jsonParser, async function (request, respons const data = await result.json(); const count = data['value']; - return response.send({ count, ids: [] }); + const ids = data['ids'] ?? []; + return response.send({ count, ids }); } catch (error) { console.log(error); return response.send({ error: true }); @@ -617,7 +618,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re const data = await result.json(); const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); - const ids = legacyApi ? [] : (data?.tokens ?? []); + const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []); return response.send({ count, ids }); } catch (error) { From c8bc9cf24c11e97ef1e399a6d27f1ef1b0a98188 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:37:51 +0200 Subject: [PATCH 085/179] Fix route name --- server.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.js b/server.js index d0936103c..2374df5a2 100644 --- a/server.js +++ b/server.js @@ -1441,7 +1441,7 @@ app.use('/api/serpapi', require('./src/endpoints/serpapi').router); app.use('/api/backends/text-completions', require('./src/endpoints/backends/text-completions').router); // KoboldAI -app.use('/api/textgen/kobold', require('./src/endpoints/textgen/kobold').router); +app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').router); const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + From 796659f68c4dcd8f723b7b9de8239bade6e0d12f Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:39:34 +0200 Subject: [PATCH 086/179] Add proper fetch import --- src/endpoints/backends/kobold.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/endpoints/backends/kobold.js b/src/endpoints/backends/kobold.js index 6093a4fa6..66d7990ec 100644 --- a/src/endpoints/backends/kobold.js +++ b/src/endpoints/backends/kobold.js @@ -1,4 +1,5 @@ const express = require('express'); +const fetch = require('node-fetch').default; const { jsonParser } = require('../../express-common'); const { forwardFetchResponse, delay } = require('../../util'); From 92bd766bcb0de3342f44c6ef20165b4580bdb740 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:33:52 -0500 Subject: [PATCH 087/179] Rename chat completions endpoints OpenAI calls this the "Chat Completions API", in contrast to their previous "Text Completions API", so that's what I'm naming it; both because other services besides OpenAI implement it, and to avoid confusion with the existing /api/openai route used for OpenAI extras. --- public/scripts/openai.js | 6 +++--- server.js | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index a9bc5e304..3e8447cb4 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1556,7 +1556,7 @@ async function sendOpenAIRequest(type, messages, signal) { generate_data['seed'] = oai_settings.seed; } - const generate_url = '/generate_openai'; + const generate_url = '/api/backends/chat-completions/generate'; const response = await fetch(generate_url, { method: 'POST', body: JSON.stringify(generate_data), @@ -1646,7 +1646,7 @@ async function calculateLogitBias() { let result = {}; try { - const reply = await fetch(`/openai_bias?model=${getTokenizerModel()}`, { + const reply = await fetch(`/api/backends/chat-completions/bias?model=${getTokenizerModel()}`, { method: 'POST', headers: getRequestHeaders(), body, @@ -2439,7 +2439,7 @@ async function getStatusOpen() { } try { - const response = await fetch('/getstatus_openai', { + const response = await fetch('/api/backends/chat-completions/status', { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify(data), diff --git a/server.js b/server.js index 2374df5a2..6f1c7c8c3 100644 --- a/server.js +++ b/server.js @@ -626,7 +626,7 @@ function cleanUploads() { } /* OpenAI */ -app.post('/getstatus_openai', jsonParser, async function (request, response_getstatus_openai) { +app.post('/api/backends/chat-completions/status', jsonParser, async function (request, response_getstatus_openai) { if (!request.body) return response_getstatus_openai.sendStatus(400); let api_url; @@ -702,7 +702,7 @@ app.post('/getstatus_openai', jsonParser, async function (request, response_gets } }); -app.post('/openai_bias', jsonParser, async function (request, response) { +app.post('/api/backends/chat-completions/bias', jsonParser, async function (request, response) { if (!request.body || !Array.isArray(request.body)) return response.sendStatus(400); @@ -1067,7 +1067,7 @@ async function sendPalmRequest(request, response) { } } -app.post('/generate_openai', jsonParser, function (request, response_generate_openai) { +app.post('/api/backends/chat-completions/generate', jsonParser, function (request, response_generate_openai) { if (!request.body) return response_generate_openai.status(400).send({ error: true }); switch (request.body.chat_completion_source) { From dba66e756a5436cd8dbd3976a271bedd63e491ca Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:50:43 -0500 Subject: [PATCH 088/179] Move chat completions API endpoints to module --- server.js | 619 +----------------- src/chat-completion.js | 77 --- src/endpoints/backends/chat-completions.js | 700 +++++++++++++++++++++ src/endpoints/tokenizers.js | 2 +- 4 files changed, 706 insertions(+), 692 deletions(-) delete mode 100644 src/chat-completion.js create mode 100644 src/endpoints/backends/chat-completions.js diff --git a/server.js b/server.js index 6f1c7c8c3..af0f89a5e 100644 --- a/server.js +++ b/server.js @@ -48,7 +48,6 @@ const { getVersion, getConfigValue, color, - uuidv4, tryParse, clientRelativePath, removeFileExtension, @@ -58,8 +57,7 @@ const { forwardFetchResponse, } = require('./src/util'); const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); -const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); -const { convertClaudePrompt } = require('./src/chat-completion'); +const { loadTokenizers } = require('./src/endpoints/tokenizers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -127,11 +125,8 @@ const autorun = (getConfigValue('autorun', false) || cliArguments.autorun) && !c const enableExtensions = getConfigValue('enableExtensions', true); const listen = getConfigValue('listen', false); -const API_OPENAI = 'https://api.openai.com/v1'; -const API_CLAUDE = 'https://api.anthropic.com/v1'; - const SETTINGS_FILE = './public/settings.json'; -const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); +const { DIRECTORIES, UPLOADS_PATH, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); // CORS Settings // const CORS = cors({ @@ -625,223 +620,6 @@ function cleanUploads() { } } -/* OpenAI */ -app.post('/api/backends/chat-completions/status', jsonParser, async function (request, response_getstatus_openai) { - if (!request.body) return response_getstatus_openai.sendStatus(400); - - let api_url; - let api_key_openai; - let headers; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_getstatus_openai.status(400).send({ error: true }); - } - - try { - const response = await fetch(api_url + '/models', { - method: 'GET', - headers: { - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - }); - - if (response.ok) { - const data = await response.json(); - response_getstatus_openai.send(data); - - if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { - let models = []; - - data.data.forEach(model => { - const context_length = model.context_length; - const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); - const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); - models[model.id] = { - tokens_per_dollar: tokens_rounded + 'k', - context_length: context_length, - }; - }); - - console.log('Available OpenRouter models:', models); - } else { - const models = data?.data; - - if (Array.isArray(models)) { - const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); - console.log('Available OpenAI models:', modelIds); - } else { - console.log('OpenAI endpoint did not return a list of models.'); - } - } - } - else { - console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); - response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); - } - } catch (e) { - console.error(e); - - if (!response_getstatus_openai.headersSent) { - response_getstatus_openai.send({ error: true }); - } else { - response_getstatus_openai.end(); - } - } -}); - -app.post('/api/backends/chat-completions/bias', jsonParser, async function (request, response) { - if (!request.body || !Array.isArray(request.body)) - return response.sendStatus(400); - - try { - const result = {}; - const model = getTokenizerModel(String(request.query.model || '')); - - // no bias for claude - if (model == 'claude') { - return response.send(result); - } - - let encodeFunction; - - if (sentencepieceTokenizers.includes(model)) { - const tokenizer = getSentencepiceTokenizer(model); - const instance = await tokenizer?.get(); - encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); - } else { - const tokenizer = getTiktokenTokenizer(model); - encodeFunction = (tokenizer.encode.bind(tokenizer)); - } - - for (const entry of request.body) { - if (!entry || !entry.text) { - continue; - } - - try { - const tokens = getEntryTokens(entry.text, encodeFunction); - - for (const token of tokens) { - result[token] = entry.value; - } - } catch { - console.warn('Tokenizer failed to encode:', entry.text); - } - } - - // not needed for cached tokenizers - //tokenizer.free(); - return response.send(result); - - /** - * Gets tokenids for a given entry - * @param {string} text Entry text - * @param {(string) => Uint32Array} encode Function to encode text to token ids - * @returns {Uint32Array} Array of token ids - */ - function getEntryTokens(text, encode) { - // Get raw token ids from JSON array - if (text.trim().startsWith('[') && text.trim().endsWith(']')) { - try { - const json = JSON.parse(text); - if (Array.isArray(json) && json.every(x => typeof x === 'number')) { - return new Uint32Array(json); - } - } catch { - // ignore - } - } - - // Otherwise, get token ids from tokenizer - return encode(text); - } - } catch (error) { - console.error(error); - return response.send({}); - } -}); - -function convertChatMLPrompt(messages) { - if (typeof messages === 'string') { - return messages; - } - - const messageStrings = []; - messages.forEach(m => { - if (m.role === 'system' && m.name === undefined) { - messageStrings.push('System: ' + m.content); - } - else if (m.role === 'system' && m.name !== undefined) { - messageStrings.push(m.name + ': ' + m.content); - } - else { - messageStrings.push(m.role + ': ' + m.content); - } - }); - return messageStrings.join('\n') + '\nassistant:'; -} - -async function sendScaleRequest(request, response) { - - const api_url = new URL(request.body.api_url_scale).toString(); - const api_key_scale = readSecret(SECRET_KEYS.SCALE); - - if (!api_key_scale) { - console.log('Scale API key is missing.'); - return response.status(400).send({ error: true }); - } - - const requestPrompt = convertChatMLPrompt(request.body.messages); - console.log('Scale request:', requestPrompt); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(api_url, { - method: 'POST', - body: JSON.stringify({ input: { input: requestPrompt } }), - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Basic ${api_key_scale}`, - }, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - console.log('Scale response:', generateResponseJson); - - const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; - return response.send(reply); - } catch (error) { - console.log(error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) { if (!request.body) return response_generate_scale.sendStatus(400); @@ -908,396 +686,6 @@ app.post('/generate_altscale', jsonParser, function (request, response_generate_ }); -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendClaudeRequest(request, response) { - - const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); - const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - - if (!api_key_claude) { - console.log('Claude API key is missing.'); - return response.status(400).send({ error: true }); - } - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; - let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); - - if (request.body.assistant_prefill && !request.body.exclude_assistant) { - requestPrompt += request.body.assistant_prefill; - } - - console.log('Claude request:', requestPrompt); - const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; - - // Add custom stop sequences - if (Array.isArray(request.body.stop)) { - stop_sequences.push(...request.body.stop); - } - - const generateResponse = await fetch(api_url + '/complete', { - method: 'POST', - signal: controller.signal, - body: JSON.stringify({ - prompt: requestPrompt, - model: request.body.model, - max_tokens_to_sample: request.body.max_tokens, - stop_sequences: stop_sequences, - temperature: request.body.temperature, - top_p: request.body.top_p, - top_k: request.body.top_k, - stream: request.body.stream, - }), - headers: { - 'Content-Type': 'application/json', - 'anthropic-version': '2023-06-01', - 'x-api-key': api_key_claude, - }, - timeout: 0, - }); - - if (request.body.stream) { - // Pipe remote SSE stream to Express response - forwardFetchResponse(generateResponse, response); - } else { - if (!generateResponse.ok) { - console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson.completion; - console.log('Claude response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } - } catch (error) { - console.log('Error communicating with Claude: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendPalmRequest(request, response) { - const api_key_palm = readSecret(SECRET_KEYS.PALM); - - if (!api_key_palm) { - console.log('Palm API key is missing.'); - return response.status(400).send({ error: true }); - } - - const body = { - prompt: { - text: request.body.messages, - }, - stopSequences: request.body.stop, - safetySettings: PALM_SAFETY, - temperature: request.body.temperature, - topP: request.body.top_p, - topK: request.body.top_k || undefined, - maxOutputTokens: request.body.max_tokens, - candidate_count: 1, - }; - - console.log('Palm request:', body); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { - body: JSON.stringify(body), - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - signal: controller.signal, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; - - if (!responseText) { - console.log('Palm API returned no response', generateResponseJson); - let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; - - // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; - } - - return response.send({ error: { message } }); - } - - console.log('Palm response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } catch (error) { - console.log('Error communicating with Palm API: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -app.post('/api/backends/chat-completions/generate', jsonParser, function (request, response_generate_openai) { - if (!request.body) return response_generate_openai.status(400).send({ error: true }); - - switch (request.body.chat_completion_source) { - case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); - } - - let api_url; - let api_key_openai; - let headers; - let bodyParams; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - bodyParams = {}; - - if (getConfigValue('openai.randomizeUserId', false)) { - bodyParams['user'] = uuidv4(); - } - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - bodyParams = { 'transforms': ['middle-out'] }; - - if (request.body.use_fallback) { - bodyParams['route'] = 'fallback'; - } - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_generate_openai.status(400).send({ error: true }); - } - - // Add custom stop sequences - if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { - bodyParams['stop'] = request.body.stop; - } - - const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; - const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; - const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? - `${api_url}/completions` : - `${api_url}/chat/completions`; - - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - /** @type {import('node-fetch').RequestInit} */ - const config = { - method: 'post', - headers: { - 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - body: JSON.stringify({ - 'messages': isTextCompletion === false ? request.body.messages : undefined, - 'prompt': isTextCompletion === true ? textPrompt : undefined, - 'model': request.body.model, - 'temperature': request.body.temperature, - 'max_tokens': request.body.max_tokens, - 'stream': request.body.stream, - 'presence_penalty': request.body.presence_penalty, - 'frequency_penalty': request.body.frequency_penalty, - 'top_p': request.body.top_p, - 'top_k': request.body.top_k, - 'stop': isTextCompletion === false ? request.body.stop : undefined, - 'logit_bias': request.body.logit_bias, - 'seed': request.body.seed, - ...bodyParams, - }), - signal: controller.signal, - timeout: 0, - }; - - console.log(JSON.parse(String(config.body))); - - makeRequest(config, response_generate_openai, request); - - /** - * - * @param {*} config - * @param {express.Response} response_generate_openai - * @param {express.Request} request - * @param {Number} retries - * @param {Number} timeout - */ - async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { - try { - const fetchResponse = await fetch(endpointUrl, config); - - if (request.body.stream) { - console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); - return; - } - - if (fetchResponse.ok) { - let json = await fetchResponse.json(); - response_generate_openai.send(json); - console.log(json); - console.log(json?.choices[0]?.message); - } else if (fetchResponse.status === 429 && retries > 0) { - console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); - setTimeout(() => { - timeout *= 2; - makeRequest(config, response_generate_openai, request, retries - 1, timeout); - }, timeout); - } else { - await handleErrorResponse(fetchResponse); - } - } catch (error) { - console.log('Generation failed', error); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: true }); - } else { - response_generate_openai.end(); - } - } - } - - async function handleErrorResponse(response) { - const responseText = await response.text(); - const errorData = tryParse(responseText); - - const statusMessages = { - 400: 'Bad request', - 401: 'Unauthorized', - 402: 'Credit limit reached', - 403: 'Forbidden', - 404: 'Not found', - 429: 'Too many requests', - 451: 'Unavailable for legal reasons', - 502: 'Bad gateway', - }; - - const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; - const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; - console.log(message); - - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: { message }, quota_error: quota_error }); - } else if (!response_generate_openai.writableEnded) { - response_generate_openai.write(response); - } else { - response_generate_openai.end(); - } - } -}); - -async function sendAI21Request(request, response) { - if (!request.body) return response.sendStatus(400); - const controller = new AbortController(); - console.log(request.body.messages); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - const options = { - method: 'POST', - headers: { - accept: 'application/json', - 'content-type': 'application/json', - Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, - }, - body: JSON.stringify({ - numResults: 1, - maxTokens: request.body.max_tokens, - minTokens: 0, - temperature: request.body.temperature, - topP: request.body.top_p, - stopSequences: request.body.stop_tokens, - topKReturn: request.body.top_k, - frequencyPenalty: { - scale: request.body.frequency_penalty * 100, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - presencePenalty: { - scale: request.body.presence_penalty, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - countPenalty: { - scale: request.body.count_pen, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - prompt: request.body.messages, - }), - signal: controller.signal, - }; - - fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) - .then(r => r.json()) - .then(r => { - if (r.completions === undefined) { - console.log(r); - } else { - console.log(r.completions[0].data.text); - } - const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; - return response.send(reply); - }) - .catch(err => { - console.error(err); - return response.send({ error: true }); - }); - -} - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. @@ -1443,6 +831,9 @@ app.use('/api/backends/text-completions', require('./src/endpoints/backends/text // KoboldAI app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').router); +// OpenAI chat completions +app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/chat-completion.js b/src/chat-completion.js deleted file mode 100644 index 4fc21a550..000000000 --- a/src/chat-completion.js +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Convert a prompt from the ChatML objects to the format used by Claude. - * @param {object[]} messages Array of messages - * @param {boolean} addHumanPrefix Add Human prefix - * @param {boolean} addAssistantPostfix Add Assistant postfix - * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " - * @returns {string} Prompt for Claude - * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). - */ -function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { - // Claude doesn't support message names, so we'll just add them to the message content. - for (const message of messages) { - if (message.name && message.role !== 'system') { - message.content = message.name + ': ' + message.content; - delete message.name; - } - } - - let systemPrompt = ''; - if (withSystemPrompt) { - let lastSystemIdx = -1; - - for (let i = 0; i < messages.length - 1; i++) { - const message = messages[i]; - if (message.role === 'system' && !message.name) { - systemPrompt += message.content + '\n\n'; - } else { - lastSystemIdx = i - 1; - break; - } - } - if (lastSystemIdx >= 0) { - messages.splice(0, lastSystemIdx + 1); - } - } - - let requestPrompt = messages.map((v) => { - let prefix = ''; - switch (v.role) { - case 'assistant': - prefix = '\n\nAssistant: '; - break; - case 'user': - prefix = '\n\nHuman: '; - break; - case 'system': - // According to the Claude docs, H: and A: should be used for example conversations. - if (v.name === 'example_assistant') { - prefix = '\n\nA: '; - } else if (v.name === 'example_user') { - prefix = '\n\nH: '; - } else { - prefix = '\n\n'; - } - break; - } - return prefix + v.content; - }).join(''); - - if (addHumanPrefix) { - requestPrompt = '\n\nHuman: ' + requestPrompt; - } - - if (addAssistantPostfix) { - requestPrompt = requestPrompt + '\n\nAssistant: '; - } - - if (withSystemPrompt) { - requestPrompt = systemPrompt + requestPrompt; - } - - return requestPrompt; -} - -module.exports = { - convertClaudePrompt, -}; diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js new file mode 100644 index 000000000..ec7dafe5c --- /dev/null +++ b/src/endpoints/backends/chat-completions.js @@ -0,0 +1,700 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); +const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); +const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); +const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); + +const API_OPENAI = 'https://api.openai.com/v1'; +const API_CLAUDE = 'https://api.anthropic.com/v1'; + +/** + * Convert a prompt from the ChatML objects to the format used by Claude. + * @param {object[]} messages Array of messages + * @param {boolean} addHumanPrefix Add Human prefix + * @param {boolean} addAssistantPostfix Add Assistant postfix + * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " + * @returns {string} Prompt for Claude + * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). + */ +function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { + // Claude doesn't support message names, so we'll just add them to the message content. + for (const message of messages) { + if (message.name && message.role !== 'system') { + message.content = message.name + ': ' + message.content; + delete message.name; + } + } + + let systemPrompt = ''; + if (withSystemPrompt) { + let lastSystemIdx = -1; + + for (let i = 0; i < messages.length - 1; i++) { + const message = messages[i]; + if (message.role === 'system' && !message.name) { + systemPrompt += message.content + '\n\n'; + } else { + lastSystemIdx = i - 1; + break; + } + } + if (lastSystemIdx >= 0) { + messages.splice(0, lastSystemIdx + 1); + } + } + + let requestPrompt = messages.map((v) => { + let prefix = ''; + switch (v.role) { + case 'assistant': + prefix = '\n\nAssistant: '; + break; + case 'user': + prefix = '\n\nHuman: '; + break; + case 'system': + // According to the Claude docs, H: and A: should be used for example conversations. + if (v.name === 'example_assistant') { + prefix = '\n\nA: '; + } else if (v.name === 'example_user') { + prefix = '\n\nH: '; + } else { + prefix = '\n\n'; + } + break; + } + return prefix + v.content; + }).join(''); + + if (addHumanPrefix) { + requestPrompt = '\n\nHuman: ' + requestPrompt; + } + + if (addAssistantPostfix) { + requestPrompt = requestPrompt + '\n\nAssistant: '; + } + + if (withSystemPrompt) { + requestPrompt = systemPrompt + requestPrompt; + } + + return requestPrompt; +} + +/** + * @param {express.Request} request + * @param {express.Response} response + */ +async function sendClaudeRequest(request, response) { + + const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); + const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); + + if (!api_key_claude) { + console.log('Claude API key is missing.'); + return response.status(400).send({ error: true }); + } + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; + let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); + + if (request.body.assistant_prefill && !request.body.exclude_assistant) { + requestPrompt += request.body.assistant_prefill; + } + + console.log('Claude request:', requestPrompt); + const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; + + // Add custom stop sequences + if (Array.isArray(request.body.stop)) { + stop_sequences.push(...request.body.stop); + } + + const generateResponse = await fetch(api_url + '/complete', { + method: 'POST', + signal: controller.signal, + body: JSON.stringify({ + prompt: requestPrompt, + model: request.body.model, + max_tokens_to_sample: request.body.max_tokens, + stop_sequences: stop_sequences, + temperature: request.body.temperature, + top_p: request.body.top_p, + top_k: request.body.top_k, + stream: request.body.stream, + }), + headers: { + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01', + 'x-api-key': api_key_claude, + }, + timeout: 0, + }); + + if (request.body.stream) { + // Pipe remote SSE stream to Express response + forwardFetchResponse(generateResponse, response); + } else { + if (!generateResponse.ok) { + console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson.completion; + console.log('Claude response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } + } catch (error) { + console.log('Error communicating with Claude: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +function convertChatMLPrompt(messages) { + if (typeof messages === 'string') { + return messages; + } + + const messageStrings = []; + messages.forEach(m => { + if (m.role === 'system' && m.name === undefined) { + messageStrings.push('System: ' + m.content); + } + else if (m.role === 'system' && m.name !== undefined) { + messageStrings.push(m.name + ': ' + m.content); + } + else { + messageStrings.push(m.role + ': ' + m.content); + } + }); + return messageStrings.join('\n') + '\nassistant:'; +} + +async function sendScaleRequest(request, response) { + + const api_url = new URL(request.body.api_url_scale).toString(); + const api_key_scale = readSecret(SECRET_KEYS.SCALE); + + if (!api_key_scale) { + console.log('Scale API key is missing.'); + return response.status(400).send({ error: true }); + } + + const requestPrompt = convertChatMLPrompt(request.body.messages); + console.log('Scale request:', requestPrompt); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(api_url, { + method: 'POST', + body: JSON.stringify({ input: { input: requestPrompt } }), + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Basic ${api_key_scale}`, + }, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + console.log('Scale response:', generateResponseJson); + + const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; + return response.send(reply); + } catch (error) { + console.log(error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +/** + * @param {express.Request} request + * @param {express.Response} response + */ +async function sendPalmRequest(request, response) { + const api_key_palm = readSecret(SECRET_KEYS.PALM); + + if (!api_key_palm) { + console.log('Palm API key is missing.'); + return response.status(400).send({ error: true }); + } + + const body = { + prompt: { + text: request.body.messages, + }, + stopSequences: request.body.stop, + safetySettings: PALM_SAFETY, + temperature: request.body.temperature, + topP: request.body.top_p, + topK: request.body.top_k || undefined, + maxOutputTokens: request.body.max_tokens, + candidate_count: 1, + }; + + console.log('Palm request:', body); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { + body: JSON.stringify(body), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + signal: controller.signal, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson?.candidates[0]?.output; + + if (!responseText) { + console.log('Palm API returned no response', generateResponseJson); + let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; + + // Check for filters + if (generateResponseJson?.filters[0]?.message) { + message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; + } + + return response.send({ error: { message } }); + } + + console.log('Palm response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } catch (error) { + console.log('Error communicating with Palm API: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +async function sendAI21Request(request, response) { + if (!request.body) return response.sendStatus(400); + const controller = new AbortController(); + console.log(request.body.messages); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + const options = { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, + }, + body: JSON.stringify({ + numResults: 1, + maxTokens: request.body.max_tokens, + minTokens: 0, + temperature: request.body.temperature, + topP: request.body.top_p, + stopSequences: request.body.stop_tokens, + topKReturn: request.body.top_k, + frequencyPenalty: { + scale: request.body.frequency_penalty * 100, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + presencePenalty: { + scale: request.body.presence_penalty, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + countPenalty: { + scale: request.body.count_pen, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + prompt: request.body.messages, + }), + signal: controller.signal, + }; + + fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) + .then(r => r.json()) + .then(r => { + if (r.completions === undefined) { + console.log(r); + } else { + console.log(r.completions[0].data.text); + } + const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; + return response.send(reply); + }) + .catch(err => { + console.error(err); + return response.send({ error: true }); + }); + +} + +const router = express.Router(); + +router.post('/status', jsonParser, async function (request, response_getstatus_openai) { + if (!request.body) return response_getstatus_openai.sendStatus(400); + + let api_url; + let api_key_openai; + let headers; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + } else { + api_url = 'https://openrouter.ai/api/v1'; + api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + } + + if (!api_key_openai && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response_getstatus_openai.status(400).send({ error: true }); + } + + try { + const response = await fetch(api_url + '/models', { + method: 'GET', + headers: { + 'Authorization': 'Bearer ' + api_key_openai, + ...headers, + }, + }); + + if (response.ok) { + const data = await response.json(); + response_getstatus_openai.send(data); + + if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { + let models = []; + + data.data.forEach(model => { + const context_length = model.context_length; + const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); + const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); + models[model.id] = { + tokens_per_dollar: tokens_rounded + 'k', + context_length: context_length, + }; + }); + + console.log('Available OpenRouter models:', models); + } else { + const models = data?.data; + + if (Array.isArray(models)) { + const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); + console.log('Available OpenAI models:', modelIds); + } else { + console.log('OpenAI endpoint did not return a list of models.'); + } + } + } + else { + console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); + response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); + } + } catch (e) { + console.error(e); + + if (!response_getstatus_openai.headersSent) { + response_getstatus_openai.send({ error: true }); + } else { + response_getstatus_openai.end(); + } + } +}); + +router.post('/bias', jsonParser, async function (request, response) { + if (!request.body || !Array.isArray(request.body)) + return response.sendStatus(400); + + try { + const result = {}; + const model = getTokenizerModel(String(request.query.model || '')); + + // no bias for claude + if (model == 'claude') { + return response.send(result); + } + + let encodeFunction; + + if (sentencepieceTokenizers.includes(model)) { + const tokenizer = getSentencepiceTokenizer(model); + const instance = await tokenizer?.get(); + encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); + } else { + const tokenizer = getTiktokenTokenizer(model); + encodeFunction = (tokenizer.encode.bind(tokenizer)); + } + + for (const entry of request.body) { + if (!entry || !entry.text) { + continue; + } + + try { + const tokens = getEntryTokens(entry.text, encodeFunction); + + for (const token of tokens) { + result[token] = entry.value; + } + } catch { + console.warn('Tokenizer failed to encode:', entry.text); + } + } + + // not needed for cached tokenizers + //tokenizer.free(); + return response.send(result); + + /** + * Gets tokenids for a given entry + * @param {string} text Entry text + * @param {(string) => Uint32Array} encode Function to encode text to token ids + * @returns {Uint32Array} Array of token ids + */ + function getEntryTokens(text, encode) { + // Get raw token ids from JSON array + if (text.trim().startsWith('[') && text.trim().endsWith(']')) { + try { + const json = JSON.parse(text); + if (Array.isArray(json) && json.every(x => typeof x === 'number')) { + return new Uint32Array(json); + } + } catch { + // ignore + } + } + + // Otherwise, get token ids from tokenizer + return encode(text); + } + } catch (error) { + console.error(error); + return response.send({}); + } +}); + + +router.post('/generate', jsonParser, function (request, response_generate_openai) { + if (!request.body) return response_generate_openai.status(400).send({ error: true }); + + switch (request.body.chat_completion_source) { + case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); + } + + let api_url; + let api_key_openai; + let headers; + let bodyParams; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + bodyParams = {}; + + if (getConfigValue('openai.randomizeUserId', false)) { + bodyParams['user'] = uuidv4(); + } + } else { + api_url = 'https://openrouter.ai/api/v1'; + api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + bodyParams = { 'transforms': ['middle-out'] }; + + if (request.body.use_fallback) { + bodyParams['route'] = 'fallback'; + } + } + + if (!api_key_openai && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response_generate_openai.status(400).send({ error: true }); + } + + // Add custom stop sequences + if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { + bodyParams['stop'] = request.body.stop; + } + + const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; + const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; + const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? + `${api_url}/completions` : + `${api_url}/chat/completions`; + + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + /** @type {import('node-fetch').RequestInit} */ + const config = { + method: 'post', + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + api_key_openai, + ...headers, + }, + body: JSON.stringify({ + 'messages': isTextCompletion === false ? request.body.messages : undefined, + 'prompt': isTextCompletion === true ? textPrompt : undefined, + 'model': request.body.model, + 'temperature': request.body.temperature, + 'max_tokens': request.body.max_tokens, + 'stream': request.body.stream, + 'presence_penalty': request.body.presence_penalty, + 'frequency_penalty': request.body.frequency_penalty, + 'top_p': request.body.top_p, + 'top_k': request.body.top_k, + 'stop': isTextCompletion === false ? request.body.stop : undefined, + 'logit_bias': request.body.logit_bias, + 'seed': request.body.seed, + ...bodyParams, + }), + signal: controller.signal, + timeout: 0, + }; + + console.log(JSON.parse(String(config.body))); + + makeRequest(config, response_generate_openai, request); + + /** + * + * @param {*} config + * @param {express.Response} response_generate_openai + * @param {express.Request} request + * @param {Number} retries + * @param {Number} timeout + */ + async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { + try { + const fetchResponse = await fetch(endpointUrl, config); + + if (request.body.stream) { + console.log('Streaming request in progress'); + forwardFetchResponse(fetchResponse, response_generate_openai); + return; + } + + if (fetchResponse.ok) { + let json = await fetchResponse.json(); + response_generate_openai.send(json); + console.log(json); + console.log(json?.choices[0]?.message); + } else if (fetchResponse.status === 429 && retries > 0) { + console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); + setTimeout(() => { + timeout *= 2; + makeRequest(config, response_generate_openai, request, retries - 1, timeout); + }, timeout); + } else { + await handleErrorResponse(fetchResponse); + } + } catch (error) { + console.log('Generation failed', error); + if (!response_generate_openai.headersSent) { + response_generate_openai.send({ error: true }); + } else { + response_generate_openai.end(); + } + } + } + + async function handleErrorResponse(response) { + const responseText = await response.text(); + const errorData = tryParse(responseText); + + const statusMessages = { + 400: 'Bad request', + 401: 'Unauthorized', + 402: 'Credit limit reached', + 403: 'Forbidden', + 404: 'Not found', + 429: 'Too many requests', + 451: 'Unavailable for legal reasons', + 502: 'Bad gateway', + }; + + const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; + const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; + console.log(message); + + if (!response_generate_openai.headersSent) { + response_generate_openai.send({ error: { message }, quota_error: quota_error }); + } else if (!response_generate_openai.writableEnded) { + response_generate_openai.write(response); + } else { + response_generate_openai.end(); + } + } +}); + +module.exports = { + router, + convertClaudePrompt, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index a81779d97..bf43ef343 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('../chat-completion'); +const { convertClaudePrompt } = require('./textgen/chat-completions'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 22e048b5af6fc84aeffbf5fc38814448407d03d4 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:54:47 -0500 Subject: [PATCH 089/179] Rename generate_altscale endpoint --- public/scripts/openai.js | 2 +- server.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 3e8447cb4..b8cdc818f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1382,7 +1382,7 @@ function openRouterGroupByVendor(array) { } async function sendAltScaleRequest(messages, logit_bias, signal, type) { - const generate_url = '/generate_altscale'; + const generate_url = '/api/backends/scale-alt/generate'; let firstSysMsgs = []; for (let msg of messages) { diff --git a/server.js b/server.js index af0f89a5e..e5d98bcb7 100644 --- a/server.js +++ b/server.js @@ -620,7 +620,7 @@ function cleanUploads() { } } -app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) { +app.post('/api/backends/scale-alt/generate', jsonParser, function (request, response_generate_scale) { if (!request.body) return response_generate_scale.sendStatus(400); fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { From b55ea8df04e50c54e2223b567ad07a5445b7301c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:56:55 -0500 Subject: [PATCH 090/179] Move alt Scale generation to its own module --- server.js | 71 ++------------------------- src/endpoints/backends/scale-alt.js | 76 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 67 deletions(-) create mode 100644 src/endpoints/backends/scale-alt.js diff --git a/server.js b/server.js index e5d98bcb7..dfc013265 100644 --- a/server.js +++ b/server.js @@ -43,7 +43,7 @@ util.inspect.defaultOptions.maxStringLength = null; const basicAuthMiddleware = require('./src/middleware/basicAuthMiddleware'); const { jsonParser, urlencodedParser } = require('./src/express-common.js'); const contentManager = require('./src/endpoints/content-manager'); -const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets'); +const { migrateSecrets } = require('./src/endpoints/secrets'); const { getVersion, getConfigValue, @@ -620,72 +620,6 @@ function cleanUploads() { } } -app.post('/api/backends/scale-alt/generate', jsonParser, function (request, response_generate_scale) { - if (!request.body) return response_generate_scale.sendStatus(400); - - fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, - }, - body: JSON.stringify({ - json: { - variant: { - name: 'New Variant', - appId: '', - taxonomy: null, - }, - prompt: { - id: '', - template: '{{input}}\n', - exampleVariables: {}, - variablesSourceDataId: null, - systemMessage: request.body.sysprompt, - }, - modelParameters: { - id: '', - modelId: 'GPT4', - modelType: 'OpenAi', - maxTokens: request.body.max_tokens, - temperature: request.body.temp, - stop: 'user:', - suffix: null, - topP: request.body.top_p, - logprobs: null, - logitBias: request.body.logit_bias, - }, - inputs: [ - { - index: '-1', - valueByName: { - input: request.body.prompt, - }, - }, - ], - }, - meta: { - values: { - 'variant.taxonomy': ['undefined'], - 'prompt.variablesSourceDataId': ['undefined'], - 'modelParameters.suffix': ['undefined'], - 'modelParameters.logprobs': ['undefined'], - }, - }, - }), - }) - .then(response => response.json()) - .then(data => { - console.log(data.result.data.json.outputs[0]); - return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); - }) - .catch((error) => { - console.error('Error:', error); - return response_generate_scale.send({ error: true }); - }); - -}); - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. @@ -834,6 +768,9 @@ app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').route // OpenAI chat completions app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router); +// Scale (alt method) +app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/backends/scale-alt.js b/src/endpoints/backends/scale-alt.js new file mode 100644 index 000000000..240e169b3 --- /dev/null +++ b/src/endpoints/backends/scale-alt.js @@ -0,0 +1,76 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); + +const router = express.Router(); + +router.post('/generate', jsonParser, function (request, response_generate_scale) { + if (!request.body) return response_generate_scale.sendStatus(400); + + fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, + }, + body: JSON.stringify({ + json: { + variant: { + name: 'New Variant', + appId: '', + taxonomy: null, + }, + prompt: { + id: '', + template: '{{input}}\n', + exampleVariables: {}, + variablesSourceDataId: null, + systemMessage: request.body.sysprompt, + }, + modelParameters: { + id: '', + modelId: 'GPT4', + modelType: 'OpenAi', + maxTokens: request.body.max_tokens, + temperature: request.body.temp, + stop: 'user:', + suffix: null, + topP: request.body.top_p, + logprobs: null, + logitBias: request.body.logit_bias, + }, + inputs: [ + { + index: '-1', + valueByName: { + input: request.body.prompt, + }, + }, + ], + }, + meta: { + values: { + 'variant.taxonomy': ['undefined'], + 'prompt.variablesSourceDataId': ['undefined'], + 'modelParameters.suffix': ['undefined'], + 'modelParameters.logprobs': ['undefined'], + }, + }, + }), + }) + .then(response => response.json()) + .then(data => { + console.log(data.result.data.json.outputs[0]); + return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); + }) + .catch((error) => { + console.error('Error:', error); + return response_generate_scale.send({ error: true }); + }); + +}); + +module.exports = { router }; From 69e24c96866f7ec9e7dbd9ce68c7c0523bca5fbe Mon Sep 17 00:00:00 2001 From: based Date: Thu, 14 Dec 2023 11:14:41 +1000 Subject: [PATCH 091/179] change palm naming in UI --- public/index.html | 27 ++++++++++++++++++--------- public/script.js | 8 ++++---- public/scripts/RossAscends-mods.js | 2 +- public/scripts/openai.js | 8 ++++---- public/scripts/secrets.js | 4 ++-- server.js | 6 +++--- src/endpoints/secrets.js | 2 +- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/public/index.html b/public/index.html index 348a47fa3..939440aaa 100644 --- a/public/index.html +++ b/public/index.html @@ -444,7 +444,7 @@ complete. -
    +
    Temperature
    @@ -496,7 +496,7 @@
    -
    +
    Top K
    @@ -509,7 +509,7 @@
    -
    +
    Top P
    @@ -1585,7 +1585,7 @@ - +
    @@ -1833,7 +1833,7 @@ - +

    OpenAI API key

    @@ -2100,14 +2100,23 @@
    -

    PaLM API Key

    +

    MakerSuite API Key

    - - + +
    -
    +
    For privacy reasons, your API key will be hidden after you reload the page.
    +
    +

    Google Model

    + +
    diff --git a/public/script.js b/public/script.js index c9d45baf7..cefd9d50b 100644 --- a/public/script.js +++ b/public/script.js @@ -2557,7 +2557,7 @@ function getCharacterCardFields() { } function isStreamingEnabled() { - const noStreamSources = [chat_completion_sources.SCALE, chat_completion_sources.AI21, chat_completion_sources.PALM]; + const noStreamSources = [chat_completion_sources.SCALE, chat_completion_sources.AI21, chat_completion_sources.MAKERSUITE]; return ((main_api == 'openai' && oai_settings.stream_openai && !noStreamSources.includes(oai_settings.chat_completion_source)) || (main_api == 'kobold' && kai_settings.streaming_kobold && kai_flags.can_use_streaming) || (main_api == 'novel' && nai_settings.streaming_novel) @@ -5395,7 +5395,7 @@ function changeMainAPI() { case chat_completion_sources.CLAUDE: case chat_completion_sources.OPENAI: case chat_completion_sources.AI21: - case chat_completion_sources.PALM: + case chat_completion_sources.MAKERSUITE: default: setupChatCompletionPromptManager(oai_settings); break; @@ -7535,9 +7535,9 @@ async function connectAPISlash(_, text) { source: 'ai21', button: '#api_button_openai', }, - 'palm': { + 'makersuite': { selected: 'openai', - source: 'palm', + source: 'makersuite', button: '#api_button_openai', }, }; diff --git a/public/scripts/RossAscends-mods.js b/public/scripts/RossAscends-mods.js index f5bad628b..7235763b6 100644 --- a/public/scripts/RossAscends-mods.js +++ b/public/scripts/RossAscends-mods.js @@ -415,7 +415,7 @@ function RA_autoconnect(PrevApi) { || (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) || (secret_state[SECRET_KEYS.OPENROUTER] && oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER) || (secret_state[SECRET_KEYS.AI21] && oai_settings.chat_completion_source == chat_completion_sources.AI21) - || (secret_state[SECRET_KEYS.PALM] && oai_settings.chat_completion_source == chat_completion_sources.PALM) + || (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.PALM) ) { $('#api_button_openai').trigger('click'); } diff --git a/public/scripts/openai.js b/public/scripts/openai.js index a9bc5e304..7741d6c9d 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -164,7 +164,7 @@ export const chat_completion_sources = { SCALE: 'scale', OPENROUTER: 'openrouter', AI21: 'ai21', - PALM: 'palm', + MAKERSUITE: 'makersuite', }; const prefixMap = selected_group ? { @@ -3255,10 +3255,10 @@ async function onConnectButtonClick(e) { } if (oai_settings.chat_completion_source == chat_completion_sources.PALM) { - const api_key_palm = String($('#api_key_palm').val()).trim(); + const api_key_makersuite = String($('#api_key_makersuite').val()).trim(); - if (api_key_palm.length) { - await writeSecret(SECRET_KEYS.PALM, api_key_palm); + if (api_key_makersuite.length) { + await writeSecret(SECRET_KEYS.PALM, api_key_makersuite); } if (!secret_state[SECRET_KEYS.PALM]) { diff --git a/public/scripts/secrets.js b/public/scripts/secrets.js index 84279641d..6afb538f1 100644 --- a/public/scripts/secrets.js +++ b/public/scripts/secrets.js @@ -12,7 +12,7 @@ export const SECRET_KEYS = { SCALE: 'api_key_scale', AI21: 'api_key_ai21', SCALE_COOKIE: 'scale_cookie', - PALM: 'api_key_palm', + MAKERSUITE: 'api_key_makersuite', SERPAPI: 'api_key_serpapi', }; @@ -26,7 +26,7 @@ const INPUT_MAP = { [SECRET_KEYS.SCALE]: '#api_key_scale', [SECRET_KEYS.AI21]: '#api_key_ai21', [SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie', - [SECRET_KEYS.PALM]: '#api_key_palm', + [SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite', [SECRET_KEYS.APHRODITE]: '#api_key_aphrodite', [SECRET_KEYS.TABBY]: '#api_key_tabby', }; diff --git a/server.js b/server.js index 2374df5a2..6da29c278 100644 --- a/server.js +++ b/server.js @@ -995,9 +995,9 @@ async function sendClaudeRequest(request, response) { * @param {express.Response} response */ async function sendPalmRequest(request, response) { - const api_key_palm = readSecret(SECRET_KEYS.PALM); + const api_key_makersuite = readSecret(SECRET_KEYS.PALM); - if (!api_key_palm) { + if (!api_key_makersuite) { console.log('Palm API key is missing.'); return response.status(400).send({ error: true }); } @@ -1024,7 +1024,7 @@ async function sendPalmRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_makersuite}`, { body: JSON.stringify(body), method: 'POST', headers: { diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index 54687cbeb..e4705706f 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -23,7 +23,7 @@ const SECRET_KEYS = { SCALE_COOKIE: 'scale_cookie', ONERING_URL: 'oneringtranslator_url', DEEPLX_URL: 'deeplx_url', - PALM: 'api_key_palm', + PALM: 'api_key_makersuite', SERPAPI: 'api_key_serpapi', }; From be396991de5f35e93cd16ec1c5e2ea7b77842962 Mon Sep 17 00:00:00 2001 From: based Date: Thu, 14 Dec 2023 11:53:26 +1000 Subject: [PATCH 092/179] finish implementing ui changes for google models --- public/index.html | 4 +-- public/script.js | 2 +- public/scripts/RossAscends-mods.js | 2 +- public/scripts/openai.js | 51 +++++++++++++++++++----------- src/endpoints/secrets.js | 2 +- 5 files changed, 37 insertions(+), 24 deletions(-) diff --git a/public/index.html b/public/index.html index 939440aaa..474d605de 100644 --- a/public/index.html +++ b/public/index.html @@ -2099,7 +2099,7 @@
    -
    +

    MakerSuite API Key

    @@ -2117,8 +2117,6 @@
    - -
    diff --git a/public/script.js b/public/script.js index cefd9d50b..5ed6f7c51 100644 --- a/public/script.js +++ b/public/script.js @@ -7826,7 +7826,7 @@ jQuery(async function () { } registerSlashCommand('dupe', DupeChar, [], '– duplicates the currently selected character', true, true); - registerSlashCommand('api', connectAPISlash, [], '(kobold, horde, novel, ooba, tabby, mancer, aphrodite, kcpp, oai, claude, windowai, openrouter, scale, ai21, palm) – connect to an API', true, true); + registerSlashCommand('api', connectAPISlash, [], '(kobold, horde, novel, ooba, tabby, mancer, aphrodite, kcpp, oai, claude, windowai, openrouter, scale, ai21, makersuite) – connect to an API', true, true); registerSlashCommand('impersonate', doImpersonate, ['imp'], '– calls an impersonation response', true, true); registerSlashCommand('delchat', doDeleteChat, [], '– deletes the current chat', true, true); registerSlashCommand('closechat', doCloseChat, [], '– closes the current chat', true, true); diff --git a/public/scripts/RossAscends-mods.js b/public/scripts/RossAscends-mods.js index 7235763b6..0cf4fa5c3 100644 --- a/public/scripts/RossAscends-mods.js +++ b/public/scripts/RossAscends-mods.js @@ -415,7 +415,7 @@ function RA_autoconnect(PrevApi) { || (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) || (secret_state[SECRET_KEYS.OPENROUTER] && oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER) || (secret_state[SECRET_KEYS.AI21] && oai_settings.chat_completion_source == chat_completion_sources.AI21) - || (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.PALM) + || (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) ) { $('#api_button_openai').trigger('click'); } diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 7741d6c9d..644f4f195 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -114,7 +114,6 @@ const max_128k = 128 * 1000; const max_200k = 200 * 1000; const scale_max = 8191; const claude_max = 9000; // We have a proper tokenizer, so theoretically could be larger (up to 9k) -const palm2_max = 7400; // The real context window is 8192, spare some for padding due to using turbo tokenizer const claude_100k_max = 99000; let ai21_max = 9200; //can easily fit 9k gpt tokens because j2's tokenizer is efficient af const unlocked_max = 100 * 1024; @@ -207,6 +206,7 @@ const default_settings = { personality_format: default_personality_format, openai_model: 'gpt-3.5-turbo', claude_model: 'claude-instant-v1', + google_model: 'gemini-pro', ai21_model: 'j2-ultra', windowai_model: '', openrouter_model: openrouter_website_model, @@ -260,6 +260,7 @@ const oai_settings = { personality_format: default_personality_format, openai_model: 'gpt-3.5-turbo', claude_model: 'claude-instant-v1', + google_model: 'gemini-pro', ai21_model: 'j2-ultra', windowai_model: '', openrouter_model: openrouter_website_model, @@ -1252,8 +1253,8 @@ function getChatCompletionModel() { return oai_settings.windowai_model; case chat_completion_sources.SCALE: return ''; - case chat_completion_sources.PALM: - return ''; + case chat_completion_sources.MAKERSUITE: + return oai_settings.google_model; case chat_completion_sources.OPENROUTER: return oai_settings.openrouter_model !== openrouter_website_model ? oai_settings.openrouter_model : null; case chat_completion_sources.AI21: @@ -1443,20 +1444,20 @@ async function sendOpenAIRequest(type, messages, signal) { const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER; const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE; const isAI21 = oai_settings.chat_completion_source == chat_completion_sources.AI21; - const isPalm = oai_settings.chat_completion_source == chat_completion_sources.PALM; + const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE; const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI; const isTextCompletion = (isOAI && textCompletionModels.includes(oai_settings.openai_model)) || (isOpenRouter && oai_settings.openrouter_force_instruct && power_user.instruct.enabled); const isQuiet = type === 'quiet'; const isImpersonate = type === 'impersonate'; const isContinue = type === 'continue'; - const stream = oai_settings.stream_openai && !isQuiet && !isScale && !isAI21 && !isPalm; + const stream = oai_settings.stream_openai && !isQuiet && !isScale && !isAI21 && !isGoogle; if (isTextCompletion && isOpenRouter) { messages = convertChatCompletionToInstruct(messages, type); replaceItemizedPromptText(messageId, messages); } - if (isAI21 || isPalm) { + if (isAI21 || isGoogle) { const joinedMsgs = messages.reduce((acc, obj) => { const prefix = prefixMap[obj.role]; return acc + (prefix ? (selected_group ? '\n' : prefix + ' ') : '') + obj.content + '\n'; @@ -1539,7 +1540,7 @@ async function sendOpenAIRequest(type, messages, signal) { generate_data['api_url_scale'] = oai_settings.api_url_scale; } - if (isPalm) { + if (isGoogle) { const nameStopString = isImpersonate ? `\n${name2}:` : `\n${name1}:`; const stopStringsLimit = 3; // 5 - 2 (nameStopString and new_chat_prompt) generate_data['top_k'] = Number(oai_settings.top_k_openai); @@ -2290,6 +2291,7 @@ function loadOpenAISettings(data, settings) { oai_settings.openrouter_use_fallback = settings.openrouter_use_fallback ?? default_settings.openrouter_use_fallback; oai_settings.openrouter_force_instruct = settings.openrouter_force_instruct ?? default_settings.openrouter_force_instruct; oai_settings.ai21_model = settings.ai21_model ?? default_settings.ai21_model; + oai_settings.google_model = settings.google_model ?? default_settings.google_model; oai_settings.chat_completion_source = settings.chat_completion_source ?? default_settings.chat_completion_source; oai_settings.api_url_scale = settings.api_url_scale ?? default_settings.api_url_scale; oai_settings.show_external_models = settings.show_external_models ?? default_settings.show_external_models; @@ -2326,6 +2328,8 @@ function loadOpenAISettings(data, settings) { $(`#model_claude_select option[value="${oai_settings.claude_model}"`).attr('selected', true); $('#model_windowai_select').val(oai_settings.windowai_model); $(`#model_windowai_select option[value="${oai_settings.windowai_model}"`).attr('selected', true); + $('#model_google_select').val(oai_settings.google_model); + $(`#model_google_select option[value="${oai_settings.google_model}"`).attr('selected', true); $('#model_ai21_select').val(oai_settings.ai21_model); $(`#model_ai21_select option[value="${oai_settings.ai21_model}"`).attr('selected', true); $('#openai_max_context').val(oai_settings.openai_max_context); @@ -2416,7 +2420,7 @@ async function getStatusOpen() { return resultCheckStatus(); } - const noValidateSources = [chat_completion_sources.SCALE, chat_completion_sources.CLAUDE, chat_completion_sources.AI21, chat_completion_sources.PALM]; + const noValidateSources = [chat_completion_sources.SCALE, chat_completion_sources.CLAUDE, chat_completion_sources.AI21, chat_completion_sources.MAKERSUITE]; if (noValidateSources.includes(oai_settings.chat_completion_source)) { let status = 'Unable to verify key; press "Test Message" to validate.'; setOnlineStatus(status); @@ -2499,6 +2503,7 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { openrouter_group_models: settings.openrouter_group_models, openrouter_sort_models: settings.openrouter_sort_models, ai21_model: settings.ai21_model, + google_model: settings.google_model, temperature: settings.temp_openai, frequency_penalty: settings.freq_pen_openai, presence_penalty: settings.pres_pen_openai, @@ -2868,6 +2873,7 @@ function onSettingsPresetChange() { openrouter_group_models: ['#openrouter_group_models', 'openrouter_group_models', false], openrouter_sort_models: ['#openrouter_sort_models', 'openrouter_sort_models', false], ai21_model: ['#model_ai21_select', 'ai21_model', false], + google_model: ['#model_google_select', 'google_model', false], openai_max_context: ['#openai_max_context', 'openai_max_context', false], openai_max_tokens: ['#openai_max_tokens', 'openai_max_tokens', false], wrap_in_quotes: ['#wrap_in_quotes', 'wrap_in_quotes', true], @@ -3000,7 +3006,7 @@ function getMaxContextWindowAI(value) { return max_8k; } else if (value.includes('palm-2')) { - return palm2_max; + return max_8k; } else if (value.includes('GPT-NeoXT')) { return max_2k; @@ -3045,6 +3051,11 @@ async function onModelChange() { oai_settings.ai21_model = value; } + if ($(this).is('#model_google_select')) { + console.log('Google model changed to', value); + oai_settings.google_model = value; + } + if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { if (oai_settings.max_context_unlocked) { $('#openai_max_context').attr('max', unlocked_max); @@ -3055,11 +3066,15 @@ async function onModelChange() { $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input'); } - if (oai_settings.chat_completion_source == chat_completion_sources.PALM) { + if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { if (oai_settings.max_context_unlocked) { $('#openai_max_context').attr('max', unlocked_max); + } else if (value === 'gemini-pro') { + $('#openai_max_context').attr('max', max_32k); + } else if (value === 'gemini-pro-vision') { + $('#openai_max_context').attr('max', max_16k); } else { - $('#openai_max_context').attr('max', palm2_max); + $('#openai_max_context').attr('max', max_8k); } oai_settings.openai_max_context = Math.min(Number($('#openai_max_context').attr('max')), oai_settings.openai_max_context); @@ -3254,15 +3269,15 @@ async function onConnectButtonClick(e) { } } - if (oai_settings.chat_completion_source == chat_completion_sources.PALM) { + if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { const api_key_makersuite = String($('#api_key_makersuite').val()).trim(); if (api_key_makersuite.length) { - await writeSecret(SECRET_KEYS.PALM, api_key_makersuite); + await writeSecret(SECRET_KEYS.MAKERSUITE, api_key_makersuite); } - if (!secret_state[SECRET_KEYS.PALM]) { - console.log('No secret key saved for PALM'); + if (!secret_state[SECRET_KEYS.MAKERSUITE]) { + console.log('No secret key saved for MakerSuite'); return; } } @@ -3329,8 +3344,8 @@ function toggleChatCompletionForms() { else if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { $('#model_scale_select').trigger('change'); } - else if (oai_settings.chat_completion_source == chat_completion_sources.PALM) { - $('#model_palm_select').trigger('change'); + else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { + $('#model_google_select').trigger('change'); } else if (oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER) { $('#model_openrouter_select').trigger('change'); @@ -3702,7 +3717,7 @@ $(document).ready(async function () { $('#model_claude_select').on('change', onModelChange); $('#model_windowai_select').on('change', onModelChange); $('#model_scale_select').on('change', onModelChange); - $('#model_palm_select').on('change', onModelChange); + $('#model_google_select').on('change', onModelChange); $('#model_openrouter_select').on('change', onModelChange); $('#openrouter_group_models').on('change', onOpenrouterModelSortChange); $('#openrouter_sort_models').on('change', onOpenrouterModelSortChange); diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index e4705706f..c997e5efe 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -23,7 +23,7 @@ const SECRET_KEYS = { SCALE_COOKIE: 'scale_cookie', ONERING_URL: 'oneringtranslator_url', DEEPLX_URL: 'deeplx_url', - PALM: 'api_key_makersuite', + MAKERSUITE: 'api_key_makersuite', SERPAPI: 'api_key_serpapi', }; From 3d8160cf25281ce04b7f2ca87b348feb4dda9d50 Mon Sep 17 00:00:00 2001 From: kingbri Date: Wed, 13 Dec 2023 21:39:07 -0500 Subject: [PATCH 093/179] Server: Update CORS proxy body limit The body-parser middleware only accepted 50mb of data, bump this value to 200mb. Signed-off-by: kingbri --- server.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server.js b/server.js index 2374df5a2..4f734ba78 100644 --- a/server.js +++ b/server.js @@ -222,7 +222,9 @@ if (!cliArguments.disableCsrf) { if (getConfigValue('enableCorsProxy', false) || cliArguments.corsProxy) { const bodyParser = require('body-parser'); - app.use(bodyParser.json()); + app.use(bodyParser.json({ + limit: '200mb', + })); console.log('Enabling CORS proxy'); app.use('/proxy/:url(*)', async (req, res) => { From e26159c00d8190ece95e9778009e1f763f205321 Mon Sep 17 00:00:00 2001 From: based Date: Thu, 14 Dec 2023 15:49:50 +1000 Subject: [PATCH 094/179] refactor and rework palm request to work with the 'content' format and added an endpoint for googles tokenizer --- public/img/{palm.svg => makersuite.svg} | 0 public/scripts/openai.js | 2 +- public/scripts/tokenizers.js | 17 ++++++- server.js | 60 ++++++++++++++----------- src/chat-completion.js | 30 +++++++++++++ src/constants.js | 22 +++------ src/endpoints/tokenizers.js | 21 +++++++++ src/palm-vectors.js | 2 +- 8 files changed, 108 insertions(+), 46 deletions(-) rename public/img/{palm.svg => makersuite.svg} (100%) diff --git a/public/img/palm.svg b/public/img/makersuite.svg similarity index 100% rename from public/img/palm.svg rename to public/img/makersuite.svg diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 644f4f195..015dd66a6 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1457,7 +1457,7 @@ async function sendOpenAIRequest(type, messages, signal) { replaceItemizedPromptText(messageId, messages); } - if (isAI21 || isGoogle) { + if (isAI21) { const joinedMsgs = messages.reduce((acc, obj) => { const prefix = prefixMap[obj.role]; return acc + (prefix ? (selected_group ? '\n' : prefix + ' ') : '') + obj.content + '\n'; diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index decd0f919..7600a0909 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -376,6 +376,10 @@ export function getTokenizerModel() { } } + if(oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { + return oai_settings.google_model; + } + if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { return claudeTokenizer; } @@ -389,6 +393,15 @@ export function getTokenizerModel() { */ export function countTokensOpenAI(messages, full = false) { const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; + const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE; + let tokenizerEndpoint = ''; + if(shouldTokenizeAI21) { + tokenizerEndpoint = '/api/tokenizers/ai21/count'; + } else if (shouldTokenizeGoogle) { + tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}`; + } else { + tokenizerEndpoint = `/api/tokenizers/openai/count?model=${getTokenizerModel()}`; + } const cacheObject = getTokenCacheObject(); if (!Array.isArray(messages)) { @@ -400,7 +413,7 @@ export function countTokensOpenAI(messages, full = false) { for (const message of messages) { const model = getTokenizerModel(); - if (model === 'claude' || shouldTokenizeAI21) { + if (model === 'claude' || shouldTokenizeAI21 || shouldTokenizeGoogle) { full = true; } @@ -416,7 +429,7 @@ export function countTokensOpenAI(messages, full = false) { jQuery.ajax({ async: false, type: 'POST', // - url: shouldTokenizeAI21 ? '/api/tokenizers/ai21/count' : `/api/tokenizers/openai/count?model=${model}`, + url: tokenizerEndpoint, data: JSON.stringify([message]), dataType: 'json', contentType: 'application/json', diff --git a/server.js b/server.js index 6da29c278..ac56f54fb 100644 --- a/server.js +++ b/server.js @@ -59,7 +59,7 @@ const { } = require('./src/util'); const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); -const { convertClaudePrompt } = require('./src/chat-completion'); +const { convertClaudePrompt, convertGooglePrompt } = require('./src/chat-completion'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -131,7 +131,7 @@ const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; const SETTINGS_FILE = './public/settings.json'; -const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); +const { DIRECTORIES, UPLOADS_PATH, MAKERSUITE_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); // CORS Settings // const CORS = cors({ @@ -994,29 +994,30 @@ async function sendClaudeRequest(request, response) { * @param {express.Request} request * @param {express.Response} response */ -async function sendPalmRequest(request, response) { - const api_key_makersuite = readSecret(SECRET_KEYS.PALM); +async function sendMakerSuiteRequest(request, response) { + const api_key_makersuite = readSecret(SECRET_KEYS.MAKERSUITE); if (!api_key_makersuite) { - console.log('Palm API key is missing.'); + console.log('MakerSuite API key is missing.'); return response.status(400).send({ error: true }); } - const body = { - prompt: { - text: request.body.messages, - }, + const generationConfig = { stopSequences: request.body.stop, - safetySettings: PALM_SAFETY, + candidateCount: 1, + maxOutputTokens: request.body.max_tokens, temperature: request.body.temperature, topP: request.body.top_p, topK: request.body.top_k || undefined, - maxOutputTokens: request.body.max_tokens, - candidate_count: 1, }; - console.log('Palm request:', body); + const body = { + contents: convertGooglePrompt(request.body.messages), + safetySettings: MAKERSUITE_SAFETY, + generationConfig: generationConfig, + }; + const google_model = request.body.model; try { const controller = new AbortController(); request.socket.removeAllListeners('close'); @@ -1024,7 +1025,7 @@ async function sendPalmRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_makersuite}`, { + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${google_model}:generateContent?key=${api_key_makersuite}`, { body: JSON.stringify(body), method: 'POST', headers: { @@ -1035,32 +1036,37 @@ async function sendPalmRequest(request, response) { }); if (!generateResponse.ok) { - console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + console.log(`MakerSuite API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); return response.status(generateResponse.status).send({ error: true }); } const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; - if (!responseText) { - console.log('Palm API returned no response', generateResponseJson); - let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; - - // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; + const candidates = generateResponseJson?.candidates; + if (!candidates || candidates.length === 0) { + let message = 'MakerSuite API returned no candidate'; + console.log(message, generateResponseJson); + if (generateResponseJson?.promptFeedback?.blockReason) { + message += `\nPrompt was blocked due to : ${generateResponseJson.promptFeedback.blockReason}`; } - return response.send({ error: { message } }); } - console.log('Palm response:', responseText); + const responseContent = candidates[0].content; + const responseText = responseContent.parts[0].text; + if (!responseText) { + let message = 'MakerSuite Candidate text empty'; + console.log(message, generateResponseJson); + return response.send({ error: { message } }); + } + + console.log('MakerSuite response:', responseText); // Wrap it back to OAI format const reply = { choices: [{ 'message': { 'content': responseText } }] }; return response.send(reply); } catch (error) { - console.log('Error communicating with Palm API: ', error); + console.log('Error communicating with MakerSuite API: ', error); if (!response.headersSent) { return response.status(500).send({ error: true }); } @@ -1074,7 +1080,7 @@ app.post('/generate_openai', jsonParser, function (request, response_generate_op case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response_generate_openai); } let api_url; diff --git a/src/chat-completion.js b/src/chat-completion.js index 4fc21a550..d1f97f8a3 100644 --- a/src/chat-completion.js +++ b/src/chat-completion.js @@ -72,6 +72,36 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with return requestPrompt; } +function convertGooglePrompt(messages) { + const contents = []; + let lastRole = ''; + let currentText = ''; + + messages.forEach((message, index) => { + const role = message.role === 'assistant' ? 'model' : 'user'; + if (lastRole === role) { + currentText += '\n\n' + message.content; + } else { + if (currentText !== '') { + contents.push({ + parts: [{ text: currentText.trim() }], + role: lastRole, + }); + } + currentText = message.content; + lastRole = role; + } + if (index === messages.length - 1) { + contents.push({ + parts: [{ text: currentText.trim() }], + role: lastRole, + }); + } + }); + return contents; +} + module.exports = { convertClaudePrompt, + convertGooglePrompt, }; diff --git a/src/constants.js b/src/constants.js index 32ea6fad5..7151ada24 100644 --- a/src/constants.js +++ b/src/constants.js @@ -105,29 +105,21 @@ const UNSAFE_EXTENSIONS = [ '.ws', ]; -const PALM_SAFETY = [ +const MAKERSUITE_SAFETY = [ { - category: 'HARM_CATEGORY_DEROGATORY', + category: 'HARM_CATEGORY_HARASSMENT', threshold: 'BLOCK_NONE', }, { - category: 'HARM_CATEGORY_TOXICITY', + category: 'HARM_CATEGORY_HATE_SPEECH', threshold: 'BLOCK_NONE', }, { - category: 'HARM_CATEGORY_VIOLENCE', + category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', threshold: 'BLOCK_NONE', }, { - category: 'HARM_CATEGORY_SEXUAL', - threshold: 'BLOCK_NONE', - }, - { - category: 'HARM_CATEGORY_MEDICAL', - threshold: 'BLOCK_NONE', - }, - { - category: 'HARM_CATEGORY_DANGEROUS', + category: 'HARM_CATEGORY_DANGEROUS_CONTENT', threshold: 'BLOCK_NONE', }, ]; @@ -139,7 +131,7 @@ const CHAT_COMPLETION_SOURCES = { SCALE: 'scale', OPENROUTER: 'openrouter', AI21: 'ai21', - PALM: 'palm', + MAKERSUITE: 'makersuite', }; const UPLOADS_PATH = './uploads'; @@ -160,7 +152,7 @@ module.exports = { DIRECTORIES, UNSAFE_EXTENSIONS, UPLOADS_PATH, - PALM_SAFETY, + MAKERSUITE_SAFETY, TEXTGEN_TYPES, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index a81779d97..096b0f093 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -387,6 +387,27 @@ router.post('/ai21/count', jsonParser, async function (req, res) { } }); +router.post('/google/count', jsonParser, async function (req, res) { + if (!req.body) return res.sendStatus(400); + const options = { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify({ prompt: { text: req.body[0].content } }), + }; + try { + const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${req.query.model}:countTextTokens?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`, options); + const data = await response.json(); + console.log(data) + return res.send({ 'token_count': data?.tokenCount || 0 }); + } catch (err) { + console.error(err); + return res.send({ 'token_count': 0 }); + } +}); + router.post('/llama/encode', jsonParser, createSentencepieceEncodingHandler(spp_llama)); router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd)); router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2)); diff --git a/src/palm-vectors.js b/src/palm-vectors.js index 788b474cd..b4e6a68bd 100644 --- a/src/palm-vectors.js +++ b/src/palm-vectors.js @@ -14,7 +14,7 @@ async function getPaLMVector(text) { throw new Error('No PaLM key found'); } - const response = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/embedding-gecko-001:embedText?key=${key}`, { + const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/embedding-gecko-001:embedText?key=${key}`, { method: 'POST', headers: { 'Content-Type': 'application/json', From 3e82a7d4399d6824afa4a60fa4a0965fd8ee980b Mon Sep 17 00:00:00 2001 From: based Date: Thu, 14 Dec 2023 16:31:08 +1000 Subject: [PATCH 095/179] tokenizer changes and fixes. + a toggle --- public/index.html | 8 ++++++++ public/scripts/openai.js | 11 +++++++++++ public/scripts/tokenizers.js | 2 +- src/chat-completion.js | 1 - src/endpoints/tokenizers.js | 9 ++++----- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/public/index.html b/public/index.html index 474d605de..67bb1b96d 100644 --- a/public/index.html +++ b/public/index.html @@ -1516,6 +1516,14 @@ Use the appropriate tokenizer for Jurassic models, which is more efficient than GPT's.
    +
    + +
    + Use the appropriate tokenizer for Google models via their API. Slower prompt processing, but offers much more accurate token counting. +
    +
    diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index 1eb4cd905..681daac0b 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -1,7 +1,7 @@ -import { getRequestHeaders } from '../../script.js'; -import { extension_settings } from '../extensions.js'; -import { SECRET_KEYS, secret_state } from '../secrets.js'; -import { createThumbnail } from '../utils.js'; +import {getRequestHeaders} from '../../script.js'; +import {extension_settings} from '../extensions.js'; +import {SECRET_KEYS, secret_state} from '../secrets.js'; +import {createThumbnail} from '../utils.js'; /** * Generates a caption for an image using a multimodal model. @@ -18,6 +18,10 @@ export async function getMultimodalCaption(base64Img, prompt) { throw new Error('OpenRouter API key is not set.'); } + if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE]) { + throw new Error('MakerSuite API key is not set.'); + } + // OpenRouter has a payload limit of ~2MB const base64Bytes = base64Img.length * 0.75; const compressionLimit = 2 * 1024 * 1024; @@ -26,16 +30,25 @@ export async function getMultimodalCaption(base64Img, prompt) { base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg'); } - const apiResult = await fetch('/api/openai/caption-image', { - method: 'POST', - headers: getRequestHeaders(), - body: JSON.stringify({ - image: base64Img, - prompt: prompt, - api: extension_settings.caption.multimodal_api || 'openai', - model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview', - }), - }); + const apiResult = extension_settings.caption.multimodal_api === 'google' ? + await fetch('/api/google/caption-image', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + image: base64Img, + prompt: prompt, + }), + }) + : await fetch('/api/openai/caption-image', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + image: base64Img, + prompt: prompt, + api: extension_settings.caption.multimodal_api || 'openai', + model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview', + }), + }); if (!apiResult.ok) { throw new Error('Failed to caption image via OpenAI.'); diff --git a/public/scripts/openai.js b/public/scripts/openai.js index d5cedc384..878254b21 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -3424,6 +3424,7 @@ export function isImageInliningSupported() { } const gpt4v = 'gpt-4-vision'; + const geminiProV = 'gemini-pro-vision'; const llava13b = 'llava-13b'; if (!oai_settings.image_inlining) { @@ -3433,6 +3434,8 @@ export function isImageInliningSupported() { switch (oai_settings.chat_completion_source) { case chat_completion_sources.OPENAI: return oai_settings.openai_model.includes(gpt4v); + case chat_completion_sources.MAKERSUITE: + return oai_settings.openai_model.includes(geminiProV); case chat_completion_sources.OPENROUTER: return oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava13b); default: diff --git a/server.js b/server.js index 6a9fe9171..4a19f3cd6 100644 --- a/server.js +++ b/server.js @@ -1412,6 +1412,9 @@ redirect('/downloadbackground', '/api/backgrounds/upload'); // yes, the download // OpenAI API app.use('/api/openai', require('./src/endpoints/openai').router); +//Google API +app.use('/api/google', require('./src/endpoints/google').router); + // Tokenizers app.use('/api/tokenizers', require('./src/endpoints/tokenizers').router); diff --git a/src/endpoints/google.js b/src/endpoints/google.js new file mode 100644 index 000000000..e2c5c0c2d --- /dev/null +++ b/src/endpoints/google.js @@ -0,0 +1,56 @@ +const { readSecret, SECRET_KEYS } = require('./secrets'); +const fetch = require('node-fetch').default; +const express = require('express'); +const { jsonParser } = require('../express-common'); +const { MAKERSUITE_SAFETY } = require('../constants'); + +const router = express.Router(); + +router.post('/caption-image', jsonParser, async (request, response) => { + try { + const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`; + const body = { + contents: [{ + parts: [ + { text: request.body.prompt }, + { inlineData: { + mimeType: 'image/png', + data: request.body.image, + }, + }], + }], + safetySettings: MAKERSUITE_SAFETY, + generationConfig: { maxOutputTokens: 1000 }, + }; + + const result = await fetch(url, { + body: JSON.stringify(body), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + timeout: 0, + }); + + console.log('Multimodal captioning request', body); + + if (!result.ok) { + console.log(`MakerSuite API returned error: ${result.status} ${result.statusText} ${await result.text()}`); + return response.status(result.status).send({ error: true }); + } + + const data = await result.json(); + + const caption = data?.candidates[0].content.parts[0].text; + if (!caption) { + return response.status(500).send('No caption found'); + } + + return response.json({ caption }); + } catch (error) { + console.error(error); + response.status(500).send('Internal server error'); + } +}); + +module.exports = { router }; From 178b07f4148a8e6fc2c50be5e434a797edb61fd4 Mon Sep 17 00:00:00 2001 From: based Date: Thu, 14 Dec 2023 23:18:56 +1000 Subject: [PATCH 099/179] cleaned up a little --- public/scripts/extensions/shared.js | 34 +++++++++++++---------------- src/endpoints/google.js | 10 +++++++-- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index 681daac0b..fa2105082 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -30,25 +30,21 @@ export async function getMultimodalCaption(base64Img, prompt) { base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg'); } - const apiResult = extension_settings.caption.multimodal_api === 'google' ? - await fetch('/api/google/caption-image', { - method: 'POST', - headers: getRequestHeaders(), - body: JSON.stringify({ - image: base64Img, - prompt: prompt, - }), - }) - : await fetch('/api/openai/caption-image', { - method: 'POST', - headers: getRequestHeaders(), - body: JSON.stringify({ - image: base64Img, - prompt: prompt, - api: extension_settings.caption.multimodal_api || 'openai', - model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview', - }), - }); + const isGoogle = extension_settings.caption.multimodal_api === 'google'; + const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + image: base64Img, + prompt: prompt, + ...(isGoogle + ? {} + : { + api: extension_settings.caption.multimodal_api || 'openai', + model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview', + }), + }), + }); if (!apiResult.ok) { throw new Error('Failed to caption image via OpenAI.'); diff --git a/src/endpoints/google.js b/src/endpoints/google.js index e2c5c0c2d..df675c253 100644 --- a/src/endpoints/google.js +++ b/src/endpoints/google.js @@ -14,7 +14,7 @@ router.post('/caption-image', jsonParser, async (request, response) => { parts: [ { text: request.body.prompt }, { inlineData: { - mimeType: 'image/png', + mimeType: 'image/png', //jpg images seem to work fine even with this mimetype set? data: request.body.image, }, }], @@ -40,8 +40,14 @@ router.post('/caption-image', jsonParser, async (request, response) => { } const data = await result.json(); + console.log('Multimodal captioning response', data); - const caption = data?.candidates[0].content.parts[0].text; + const candidates = data?.candidates; + if(!candidates) { + return response.status(500).send('No candidates found, image was most likely filtered.'); + } + + const caption = candidates[0].content.parts[0].text; if (!caption) { return response.status(500).send('No caption found'); } From 348253fd9830d97bc0c193361b159769c7ccd772 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:36:44 +0200 Subject: [PATCH 100/179] Fix import path --- src/endpoints/tokenizers.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index bf43ef343..b3ee6a0b8 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('./textgen/chat-completions'); +const { convertClaudePrompt } = require('./backends/chat-completions'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 2a5340232d4652a42e7fbfe1868bcd2c0b48ecdd Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:17 +0200 Subject: [PATCH 101/179] Move prompt converters to a separate module. Camelcase local variables and add missing JSDocs. --- src/endpoints/backends/chat-completions.js | 225 +++++++-------------- src/endpoints/prompt-converters.js | 103 ++++++++++ src/endpoints/tokenizers.js | 2 +- 3 files changed, 176 insertions(+), 154 deletions(-) create mode 100644 src/endpoints/prompt-converters.js diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index ec7dafe5c..0b08d2700 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -4,6 +4,7 @@ const fetch = require('node-fetch').default; const { jsonParser } = require('../../express-common'); const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); +const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters'); const { readSecret, SECRET_KEYS } = require('../secrets'); const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); @@ -12,89 +13,15 @@ const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; /** - * Convert a prompt from the ChatML objects to the format used by Claude. - * @param {object[]} messages Array of messages - * @param {boolean} addHumanPrefix Add Human prefix - * @param {boolean} addAssistantPostfix Add Assistant postfix - * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " - * @returns {string} Prompt for Claude - * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). - */ -function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { - // Claude doesn't support message names, so we'll just add them to the message content. - for (const message of messages) { - if (message.name && message.role !== 'system') { - message.content = message.name + ': ' + message.content; - delete message.name; - } - } - - let systemPrompt = ''; - if (withSystemPrompt) { - let lastSystemIdx = -1; - - for (let i = 0; i < messages.length - 1; i++) { - const message = messages[i]; - if (message.role === 'system' && !message.name) { - systemPrompt += message.content + '\n\n'; - } else { - lastSystemIdx = i - 1; - break; - } - } - if (lastSystemIdx >= 0) { - messages.splice(0, lastSystemIdx + 1); - } - } - - let requestPrompt = messages.map((v) => { - let prefix = ''; - switch (v.role) { - case 'assistant': - prefix = '\n\nAssistant: '; - break; - case 'user': - prefix = '\n\nHuman: '; - break; - case 'system': - // According to the Claude docs, H: and A: should be used for example conversations. - if (v.name === 'example_assistant') { - prefix = '\n\nA: '; - } else if (v.name === 'example_user') { - prefix = '\n\nH: '; - } else { - prefix = '\n\n'; - } - break; - } - return prefix + v.content; - }).join(''); - - if (addHumanPrefix) { - requestPrompt = '\n\nHuman: ' + requestPrompt; - } - - if (addAssistantPostfix) { - requestPrompt = requestPrompt + '\n\nAssistant: '; - } - - if (withSystemPrompt) { - requestPrompt = systemPrompt + requestPrompt; - } - - return requestPrompt; -} - -/** - * @param {express.Request} request - * @param {express.Response} response + * Sends a request to Claude API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response */ async function sendClaudeRequest(request, response) { + const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); + const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); - const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - - if (!api_key_claude) { + if (!apiKey) { console.log('Claude API key is missing.'); return response.status(400).send({ error: true }); } @@ -121,7 +48,7 @@ async function sendClaudeRequest(request, response) { stop_sequences.push(...request.body.stop); } - const generateResponse = await fetch(api_url + '/complete', { + const generateResponse = await fetch(apiUrl + '/complete', { method: 'POST', signal: controller.signal, body: JSON.stringify({ @@ -137,7 +64,7 @@ async function sendClaudeRequest(request, response) { headers: { 'Content-Type': 'application/json', 'anthropic-version': '2023-06-01', - 'x-api-key': api_key_claude, + 'x-api-key': apiKey, }, timeout: 0, }); @@ -167,37 +94,21 @@ async function sendClaudeRequest(request, response) { } } -function convertChatMLPrompt(messages) { - if (typeof messages === 'string') { - return messages; - } - - const messageStrings = []; - messages.forEach(m => { - if (m.role === 'system' && m.name === undefined) { - messageStrings.push('System: ' + m.content); - } - else if (m.role === 'system' && m.name !== undefined) { - messageStrings.push(m.name + ': ' + m.content); - } - else { - messageStrings.push(m.role + ': ' + m.content); - } - }); - return messageStrings.join('\n') + '\nassistant:'; -} - +/** + * Sends a request to Scale Spellbook API. + * @param {import("express").Request} request Express request + * @param {import("express").Response} response Express response + */ async function sendScaleRequest(request, response) { + const apiUrl = new URL(request.body.api_url_scale).toString(); + const apiKey = readSecret(SECRET_KEYS.SCALE); - const api_url = new URL(request.body.api_url_scale).toString(); - const api_key_scale = readSecret(SECRET_KEYS.SCALE); - - if (!api_key_scale) { + if (!apiKey) { console.log('Scale API key is missing.'); return response.status(400).send({ error: true }); } - const requestPrompt = convertChatMLPrompt(request.body.messages); + const requestPrompt = convertTextCompletionPrompt(request.body.messages); console.log('Scale request:', requestPrompt); try { @@ -207,12 +118,12 @@ async function sendScaleRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(api_url, { + const generateResponse = await fetch(apiUrl, { method: 'POST', body: JSON.stringify({ input: { input: requestPrompt } }), headers: { 'Content-Type': 'application/json', - 'Authorization': `Basic ${api_key_scale}`, + 'Authorization': `Basic ${apiKey}`, }, timeout: 0, }); @@ -236,8 +147,9 @@ async function sendScaleRequest(request, response) { } /** - * @param {express.Request} request - * @param {express.Response} response + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response */ async function sendPalmRequest(request, response) { const api_key_palm = readSecret(SECRET_KEYS.PALM); @@ -312,6 +224,11 @@ async function sendPalmRequest(request, response) { } } +/** + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response + */ async function sendAI21Request(request, response) { if (!request.body) return response.sendStatus(400); const controller = new AbortController(); @@ -533,24 +450,24 @@ router.post('/bias', jsonParser, async function (request, response) { }); -router.post('/generate', jsonParser, function (request, response_generate_openai) { - if (!request.body) return response_generate_openai.status(400).send({ error: true }); +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.status(400).send({ error: true }); switch (request.body.chat_completion_source) { - case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response); + case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response); + case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response); + case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response); } - let api_url; - let api_key_openai; + let apiUrl; + let apiKey; let headers; let bodyParams; if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + apiUrl = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); headers = {}; bodyParams = {}; @@ -558,8 +475,8 @@ router.post('/generate', jsonParser, function (request, response_generate_openai bodyParams['user'] = uuidv4(); } } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + apiUrl = 'https://openrouter.ai/api/v1'; + apiKey = readSecret(SECRET_KEYS.OPENROUTER); // OpenRouter needs to pass the referer: https://openrouter.ai/docs headers = { 'HTTP-Referer': request.headers.referer }; bodyParams = { 'transforms': ['middle-out'] }; @@ -569,9 +486,9 @@ router.post('/generate', jsonParser, function (request, response_generate_openai } } - if (!api_key_openai && !request.body.reverse_proxy) { + if (!apiKey && !request.body.reverse_proxy) { console.log('OpenAI API key is missing.'); - return response_generate_openai.status(400).send({ error: true }); + return response.status(400).send({ error: true }); } // Add custom stop sequences @@ -580,10 +497,10 @@ router.post('/generate', jsonParser, function (request, response_generate_openai } const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; - const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; + const textPrompt = isTextCompletion ? convertTextCompletionPrompt(request.body.messages) : ''; const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? - `${api_url}/completions` : - `${api_url}/chat/completions`; + `${apiUrl}/completions` : + `${apiUrl}/chat/completions`; const controller = new AbortController(); request.socket.removeAllListeners('close'); @@ -596,7 +513,7 @@ router.post('/generate', jsonParser, function (request, response_generate_openai method: 'post', headers: { 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + api_key_openai, + 'Authorization': 'Bearer ' + apiKey, ...headers, }, body: JSON.stringify({ @@ -621,52 +538,55 @@ router.post('/generate', jsonParser, function (request, response_generate_openai console.log(JSON.parse(String(config.body))); - makeRequest(config, response_generate_openai, request); + makeRequest(config, response, request); /** - * - * @param {*} config - * @param {express.Response} response_generate_openai - * @param {express.Request} request - * @param {Number} retries - * @param {Number} timeout + * Makes a fetch request to the OpenAI API endpoint. + * @param {import('node-fetch').RequestInit} config Fetch config + * @param {express.Response} response Express response + * @param {express.Request} request Express request + * @param {Number} retries Number of retries left + * @param {Number} timeout Request timeout in ms */ - async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { + async function makeRequest(config, response, request, retries = 5, timeout = 5000) { try { const fetchResponse = await fetch(endpointUrl, config); if (request.body.stream) { console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); + forwardFetchResponse(fetchResponse, response); return; } if (fetchResponse.ok) { let json = await fetchResponse.json(); - response_generate_openai.send(json); + response.send(json); console.log(json); console.log(json?.choices[0]?.message); } else if (fetchResponse.status === 429 && retries > 0) { console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); setTimeout(() => { timeout *= 2; - makeRequest(config, response_generate_openai, request, retries - 1, timeout); + makeRequest(config, response, request, retries - 1, timeout); }, timeout); } else { await handleErrorResponse(fetchResponse); } } catch (error) { console.log('Generation failed', error); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: true }); + if (!response.headersSent) { + response.send({ error: true }); } else { - response_generate_openai.end(); + response.end(); } } } - async function handleErrorResponse(response) { - const responseText = await response.text(); + /** + * @param {import("node-fetch").Response} errorResponse + */ + async function handleErrorResponse(errorResponse) { + const responseText = await errorResponse.text(); const errorData = tryParse(responseText); const statusMessages = { @@ -680,21 +600,20 @@ router.post('/generate', jsonParser, function (request, response_generate_openai 502: 'Bad gateway', }; - const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; - const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; + const message = errorData?.error?.message || statusMessages[errorResponse.status] || 'Unknown error occurred'; + const quota_error = errorResponse.status === 429 && errorData?.error?.type === 'insufficient_quota'; console.log(message); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: { message }, quota_error: quota_error }); - } else if (!response_generate_openai.writableEnded) { - response_generate_openai.write(response); + if (!response.headersSent) { + response.send({ error: { message }, quota_error: quota_error }); + } else if (!response.writableEnded) { + response.write(errorResponse); } else { - response_generate_openai.end(); + response.end(); } } }); module.exports = { router, - convertClaudePrompt, }; diff --git a/src/endpoints/prompt-converters.js b/src/endpoints/prompt-converters.js new file mode 100644 index 000000000..4ffdb459e --- /dev/null +++ b/src/endpoints/prompt-converters.js @@ -0,0 +1,103 @@ +/** + * Convert a prompt from the ChatML objects to the format used by Claude. + * @param {object[]} messages Array of messages + * @param {boolean} addHumanPrefix Add Human prefix + * @param {boolean} addAssistantPostfix Add Assistant postfix + * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " + * @returns {string} Prompt for Claude + * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). + */ +function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { + // Claude doesn't support message names, so we'll just add them to the message content. + for (const message of messages) { + if (message.name && message.role !== 'system') { + message.content = message.name + ': ' + message.content; + delete message.name; + } + } + + let systemPrompt = ''; + if (withSystemPrompt) { + let lastSystemIdx = -1; + + for (let i = 0; i < messages.length - 1; i++) { + const message = messages[i]; + if (message.role === 'system' && !message.name) { + systemPrompt += message.content + '\n\n'; + } else { + lastSystemIdx = i - 1; + break; + } + } + if (lastSystemIdx >= 0) { + messages.splice(0, lastSystemIdx + 1); + } + } + + let requestPrompt = messages.map((v) => { + let prefix = ''; + switch (v.role) { + case 'assistant': + prefix = '\n\nAssistant: '; + break; + case 'user': + prefix = '\n\nHuman: '; + break; + case 'system': + // According to the Claude docs, H: and A: should be used for example conversations. + if (v.name === 'example_assistant') { + prefix = '\n\nA: '; + } else if (v.name === 'example_user') { + prefix = '\n\nH: '; + } else { + prefix = '\n\n'; + } + break; + } + return prefix + v.content; + }).join(''); + + if (addHumanPrefix) { + requestPrompt = '\n\nHuman: ' + requestPrompt; + } + + if (addAssistantPostfix) { + requestPrompt = requestPrompt + '\n\nAssistant: '; + } + + if (withSystemPrompt) { + requestPrompt = systemPrompt + requestPrompt; + } + + return requestPrompt; +} + +/** + * Convert a prompt from the ChatML objects to the format used by Text Completion API. + * @param {object[]} messages Array of messages + * @returns {string} Prompt for Text Completion API + */ +function convertTextCompletionPrompt(messages) { + if (typeof messages === 'string') { + return messages; + } + + const messageStrings = []; + messages.forEach(m => { + if (m.role === 'system' && m.name === undefined) { + messageStrings.push('System: ' + m.content); + } + else if (m.role === 'system' && m.name !== undefined) { + messageStrings.push(m.name + ': ' + m.content); + } + else { + messageStrings.push(m.role + ': ' + m.content); + } + }); + return messageStrings.join('\n') + '\nassistant:'; +} + +module.exports = { + convertClaudePrompt, + convertTextCompletionPrompt, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index b3ee6a0b8..38c04f864 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('./backends/chat-completions'); +const { convertClaudePrompt } = require('./prompt-converters'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 40e15f576285f424ce5dbcbfcd57ea07d213fdce Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:18:10 +0200 Subject: [PATCH 102/179] Fix conditional access to Palm response body --- src/endpoints/backends/chat-completions.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 0b08d2700..af463bd21 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -197,15 +197,15 @@ async function sendPalmRequest(request, response) { } const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; + const responseText = generateResponseJson?.candidates?.[0]?.output; if (!responseText) { console.log('Palm API returned no response', generateResponseJson); let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; + if (generateResponseJson?.filters?.[0]?.reason) { + message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`; } return response.send({ error: { message } }); From b52487054409eb6bb5dadf3f6afcf4755d1133e6 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:56:39 +0200 Subject: [PATCH 103/179] Fix AI21 icon styles --- public/style.css | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/style.css b/public/style.css index 28796454f..741e83189 100644 --- a/public/style.css +++ b/public/style.css @@ -3665,12 +3665,13 @@ a { } .icon-svg { - fill: currentColor; /* Takes on the color of the surrounding text */ + fill: currentColor; width: auto; height: 14px; - vertical-align: middle; + aspect-ratio: 1; /* To align with adjacent text */ + place-self: center; } .paginationjs { From 38a34bf1d5b74f5e5f3633d6483af0b646c304ef Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:14:05 +0200 Subject: [PATCH 104/179] Fix silly argument naming --- src/endpoints/backends/scale-alt.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/endpoints/backends/scale-alt.js b/src/endpoints/backends/scale-alt.js index 240e169b3..edcb7f83f 100644 --- a/src/endpoints/backends/scale-alt.js +++ b/src/endpoints/backends/scale-alt.js @@ -7,8 +7,8 @@ const { readSecret, SECRET_KEYS } = require('../secrets'); const router = express.Router(); -router.post('/generate', jsonParser, function (request, response_generate_scale) { - if (!request.body) return response_generate_scale.sendStatus(400); +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.sendStatus(400); fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { method: 'POST', @@ -61,14 +61,14 @@ router.post('/generate', jsonParser, function (request, response_generate_scale) }, }), }) - .then(response => response.json()) + .then(res => res.json()) .then(data => { console.log(data.result.data.json.outputs[0]); - return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); + return response.send({ output: data.result.data.json.outputs[0] }); }) .catch((error) => { console.error('Error:', error); - return response_generate_scale.send({ error: true }); + return response.send({ error: true }); }); }); From d5bcd96eefc9741a343e35181787519f8d541401 Mon Sep 17 00:00:00 2001 From: based Date: Fri, 15 Dec 2023 01:28:54 +1000 Subject: [PATCH 105/179] message inlining vision support --- public/scripts/openai.js | 19 +++++++------ server.js | 7 +++-- src/chat-completion.js | 59 ++++++++++++++++++++++++++++------------ 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 878254b21..1d22348ef 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -31,18 +31,18 @@ import { system_message_types, this_chid, } from '../script.js'; -import { groups, selected_group } from './group-chats.js'; +import {groups, selected_group} from './group-chats.js'; import { chatCompletionDefaultPrompts, INJECTION_POSITION, Prompt, - promptManagerDefaultPromptOrders, PromptManager, + promptManagerDefaultPromptOrders, } from './PromptManager.js'; -import { getCustomStoppingStrings, persona_description_positions, power_user } from './power-user.js'; -import { SECRET_KEYS, secret_state, writeSecret } from './secrets.js'; +import {getCustomStoppingStrings, persona_description_positions, power_user} from './power-user.js'; +import {SECRET_KEYS, secret_state, writeSecret} from './secrets.js'; import EventSourceStream from './sse-stream.js'; import { @@ -56,7 +56,7 @@ import { resetScrollHeight, stringFormat, } from './utils.js'; -import { countTokensOpenAI, getTokenizerModel } from './tokenizers.js'; +import {countTokensOpenAI, getTokenizerModel} from './tokenizers.js'; import { formatInstructModeChat, formatInstructModeExamples, @@ -1795,13 +1795,15 @@ class Message { async addImage(image) { const textContent = this.content; const isDataUrl = isDataURL(image); - if (!isDataUrl) { try { const response = await fetch(image, { method: 'GET', cache: 'force-cache' }); if (!response.ok) throw new Error('Failed to fetch image'); const blob = await response.blob(); image = await getBase64Async(blob); + if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) { + image = image.split(',')[1]; + } } catch (error) { console.error('Image adding skipped', error); return; @@ -3087,7 +3089,8 @@ async function onModelChange() { } else { $('#openai_max_context').attr('max', max_8k); } - + oai_settings.temp_openai = Math.min(claude_max_temp, oai_settings.temp_openai); + $('#temp_openai').attr('max', claude_max_temp).val(oai_settings.temp_openai).trigger('input'); oai_settings.openai_max_context = Math.min(Number($('#openai_max_context').attr('max')), oai_settings.openai_max_context); $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input'); } @@ -3435,7 +3438,7 @@ export function isImageInliningSupported() { case chat_completion_sources.OPENAI: return oai_settings.openai_model.includes(gpt4v); case chat_completion_sources.MAKERSUITE: - return oai_settings.openai_model.includes(geminiProV); + return oai_settings.google_model.includes(geminiProV); case chat_completion_sources.OPENROUTER: return oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava13b); default: diff --git a/server.js b/server.js index 4a19f3cd6..57264347d 100644 --- a/server.js +++ b/server.js @@ -1002,6 +1002,9 @@ async function sendMakerSuiteRequest(request, response) { return response.status(400).send({ error: true }); } + const google_model = request.body.model; + const should_stream = request.body.stream; + const generationConfig = { stopSequences: request.body.stop, candidateCount: 1, @@ -1012,13 +1015,11 @@ async function sendMakerSuiteRequest(request, response) { }; const body = { - contents: convertGooglePrompt(request.body.messages), + contents: convertGooglePrompt(request.body.messages, google_model), safetySettings: MAKERSUITE_SAFETY, generationConfig: generationConfig, }; - const google_model = request.body.model; - const should_stream = request.body.stream; try { const controller = new AbortController(); request.socket.removeAllListeners('close'); diff --git a/src/chat-completion.js b/src/chat-completion.js index cbed76abf..268a044e5 100644 --- a/src/chat-completion.js +++ b/src/chat-completion.js @@ -72,31 +72,56 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with return requestPrompt; } -function convertGooglePrompt(messages) { +function convertGooglePrompt(messages, type) { const contents = []; let lastRole = ''; let currentText = ''; - messages.forEach((message, index) => { - const role = message.role === 'assistant' ? 'model' : 'user'; - if (lastRole === role) { - currentText += '\n\n' + message.content; - } else { - if (currentText !== '') { + + const isMultimodal = type === 'gemini-pro-vision'; + + if (isMultimodal) { + const combinedText = messages.map((message) => { + const role = message.role === 'assistant' ? 'MODEL: ' : 'USER: '; + return role + message.content; + }).join('\n\n').trim(); + + const imageEntry = messages.find((message) => message.content[1]?.image_url); + contents.push({ + parts: [ + { text: combinedText }, + { + inlineData: { + mimeType: 'image/png', + data: imageEntry.content[1].image_url.url ?? '', + }, + }, + ], + role: 'user', + }); + } else { + messages.forEach((message, index) => { + const role = message.role === 'assistant' ? 'model' : 'user'; + if (lastRole === role) { + currentText += '\n\n' + message.content; + } else { + if (currentText !== '') { + contents.push({ + parts: [{ text: currentText.trim() }], + role: lastRole, + }); + } + currentText = message.content; + lastRole = role; + } + if (index === messages.length - 1) { contents.push({ parts: [{ text: currentText.trim() }], role: lastRole, }); } - currentText = message.content; - lastRole = role; - } - if (index === messages.length - 1) { - contents.push({ - parts: [{ text: currentText.trim() }], - role: lastRole, - }); - } - }); + }); + } + return contents; } From 60880cfd4d5add7257fc8fa38ed36fe965a71d58 Mon Sep 17 00:00:00 2001 From: based Date: Fri, 15 Dec 2023 01:39:12 +1000 Subject: [PATCH 106/179] merge --- public/scripts/openai.js | 6 +- src/endpoints/backends/chat-completions.js | 128 +++++++++++++++------ 2 files changed, 93 insertions(+), 41 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 08c1cd386..b5b505e3f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -31,7 +31,7 @@ import { system_message_types, this_chid, } from '../script.js'; -import {groups, selected_group} from './group-chats.js'; +import { groups, selected_group } from './group-chats.js'; import { chatCompletionDefaultPrompts, @@ -41,8 +41,8 @@ import { promptManagerDefaultPromptOrders, } from './PromptManager.js'; -import {getCustomStoppingStrings, persona_description_positions, power_user} from './power-user.js'; -import {SECRET_KEYS, secret_state, writeSecret} from './secrets.js'; +import { getCustomStoppingStrings, persona_description_positions, power_user } from './power-user.js'; +import { SECRET_KEYS, secret_state, writeSecret } from './secrets.js'; import EventSourceStream from './sse-stream.js'; import { diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index af463bd21..16b87ecd6 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -2,9 +2,9 @@ const express = require('express'); const fetch = require('node-fetch').default; const { jsonParser } = require('../../express-common'); -const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); +const { CHAT_COMPLETION_SOURCES, MAKERSUITE_SAFETY } = require('../../constants'); const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); -const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters'); +const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters'); const { readSecret, SECRET_KEYS } = require('../secrets'); const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); @@ -151,28 +151,35 @@ async function sendScaleRequest(request, response) { * @param {express.Request} request Express request * @param {express.Response} response Express response */ -async function sendPalmRequest(request, response) { - const api_key_palm = readSecret(SECRET_KEYS.PALM); +/** + * @param {express.Request} request + * @param {express.Response} response + */ +async function sendMakerSuiteRequest(request, response) { + const api_key_makersuite = readSecret(SECRET_KEYS.MAKERSUITE); - if (!api_key_palm) { - console.log('Palm API key is missing.'); + if (!api_key_makersuite) { + console.log('MakerSuite API key is missing.'); return response.status(400).send({ error: true }); } - const body = { - prompt: { - text: request.body.messages, - }, + const google_model = request.body.model; + const should_stream = request.body.stream; + + const generationConfig = { stopSequences: request.body.stop, - safetySettings: PALM_SAFETY, + candidateCount: 1, + maxOutputTokens: request.body.max_tokens, temperature: request.body.temperature, topP: request.body.top_p, topK: request.body.top_k || undefined, - maxOutputTokens: request.body.max_tokens, - candidate_count: 1, }; - console.log('Palm request:', body); + const body = { + contents: convertGooglePrompt(request.body.messages, google_model), + safetySettings: MAKERSUITE_SAFETY, + generationConfig: generationConfig, + }; try { const controller = new AbortController(); @@ -181,7 +188,7 @@ async function sendPalmRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${google_model}:${should_stream ? 'streamGenerateContent' : 'generateContent'}?key=${api_key_makersuite}`, { body: JSON.stringify(body), method: 'POST', headers: { @@ -190,34 +197,79 @@ async function sendPalmRequest(request, response) { signal: controller.signal, timeout: 0, }); + // have to do this because of their busted ass streaming endpoint + if (should_stream) { + try { + let partialData = ''; + generateResponse.body.on('data', (data) => { + const chunk = data.toString(); + if (chunk.startsWith(',') || chunk.endsWith(',') || chunk.startsWith('[') || chunk.endsWith(']')) { + partialData = chunk.slice(1); + } else { + partialData += chunk; + } + while (true) { + let json; + try { + json = JSON.parse(partialData); + } catch (e) { + break; + } + response.write(JSON.stringify(json)); + partialData = ''; + } + }); - if (!generateResponse.ok) { - console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } + request.socket.on('close', function () { + generateResponse.body.destroy(); + response.end(); + }); - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates?.[0]?.output; + generateResponse.body.on('end', () => { + console.log('Streaming request finished'); + response.end(); + }); - if (!responseText) { - console.log('Palm API returned no response', generateResponseJson); - let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; - - // Check for filters - if (generateResponseJson?.filters?.[0]?.reason) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`; + } catch (error) { + console.log('Error forwarding streaming response:', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } + } else { + if (!generateResponse.ok) { + console.log(`MakerSuite API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); } - return response.send({ error: { message } }); + const generateResponseJson = await generateResponse.json(); + + const candidates = generateResponseJson?.candidates; + if (!candidates || candidates.length === 0) { + let message = 'MakerSuite API returned no candidate'; + console.log(message, generateResponseJson); + if (generateResponseJson?.promptFeedback?.blockReason) { + message += `\nPrompt was blocked due to : ${generateResponseJson.promptFeedback.blockReason}`; + } + return response.send({ error: { message } }); + } + + const responseContent = candidates[0].content; + const responseText = responseContent.parts[0].text; + if (!responseText) { + let message = 'MakerSuite Candidate text empty'; + console.log(message, generateResponseJson); + return response.send({ error: { message } }); + } + + console.log('MakerSuite response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); } - - console.log('Palm response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); } catch (error) { - console.log('Error communicating with Palm API: ', error); + console.log('Error communicating with MakerSuite API: ', error); if (!response.headersSent) { return response.status(500).send({ error: true }); } @@ -225,7 +277,7 @@ async function sendPalmRequest(request, response) { } /** - * Sends a request to Google AI API. + * Sends a request to AI21 API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ @@ -457,7 +509,7 @@ router.post('/generate', jsonParser, function (request, response) { case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response); case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response); case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response); + case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response); } let apiUrl; From 372ef2172ef7cd090fe2d3cd464347717af8b37a Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 16:00:38 +0000 Subject: [PATCH 107/179] add slash commands to crud QRs --- .../scripts/extensions/quick-reply/index.js | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index a5b05f692..dcaa695c2 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -1,6 +1,6 @@ import { saveSettingsDebounced, callPopup, getRequestHeaders, substituteParams, eventSource, event_types, animation_duration } from '../../../script.js'; import { getContext, extension_settings } from '../../extensions.js'; -import { getSortableDelay, escapeHtml } from '../../utils.js'; +import { getSortableDelay, escapeHtml, delay } from '../../utils.js'; import { executeSlashCommands, registerSlashCommand } from '../../slash-commands.js'; import { ContextMenu } from './src/ContextMenu.js'; import { MenuItem } from './src/MenuItem.js'; @@ -717,6 +717,68 @@ function saveQROrder() { }); } +async function qrCreateCallback(args, mes) { + const qr = { + label: args.label ?? '', + mes: mes, + title: args.title ?? '', + autoExecute_chatLoad: JSON.parse(args.load ?? false), + autoExecute_userMessage: JSON.parse(args.user ?? false), + autoExecute_botMessage: JSON.parse(args.bot ?? false), + autoExecute_appStartup: JSON.parse(args.startup ?? false), + hidden: JSON.parse(args.hidden ?? false), + }; + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + preset.quickReplySlots.push(qr); + preset.numberOfSlots++; + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} +async function qrUpdateCallback(args, mes) { + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + const idx = preset.quickReplySlots.findIndex(x => x.label == args.label); + const oqr = preset.quickReplySlots[idx]; + const qr = { + label: args.newlabel ?? oqr.label ?? '', + mes: mes ?? oqr.mes, + title: args.title ?? oqr.title ?? '', + autoExecute_chatLoad: JSON.parse(args.load ?? oqr.autoExecute_chatLoad ?? false), + autoExecute_userMessage: JSON.parse(args.user ?? oqr.autoExecute_userMessage ?? false), + autoExecute_botMessage: JSON.parse(args.bot ?? oqr.autoExecute_botMessage ?? false), + autoExecute_appStartup: JSON.parse(args.startup ?? oqr.autoExecute_appStartup ?? false), + hidden: JSON.parse(args.hidden ?? oqr.hidden ?? false), + }; + preset.quickReplySlots[idx] = qr; + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} +async function qrDeleteCallback(args, label) { + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + const idx = preset.quickReplySlots.findIndex(x => x.label == label); + preset.quickReplySlots.splice(idx, 1); + preset.numberOfSlots--; + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} + let onMessageSentExecuting = false; let onMessageReceivedExecuting = false; let onChatChangedExecuting = false; @@ -901,4 +963,21 @@ jQuery(async () => { jQuery(() => { registerSlashCommand('qr', doQR, [], '(number) – activates the specified Quick Reply', true, true); registerSlashCommand('qrset', doQRPresetSwitch, [], '(name) – swaps to the specified Quick Reply Preset', true, true); + const qrArgs = ` + label - string - text on the button, e.g., label=MyButton + set - string - name of the QR set, e.g., set=PresetName1 + hidden - bool - whether the button should be hidden, e.g., hidden=true + startup - bool - auto execute on app startup, e.g., startup=true + user - bool - auto execute on user message, e.g., user=true + bot - bool - auto execute on AI message, e.g., bot=true + load - bool - auto execute on chat load, e.g., load=true + title - bool - title / tooltip to be shown on button, e.g., title="My Fancy Button" + `.trim(); + const qrUpdateArgs = ` + newlabel - text - new text fort the button, e.g. newlabel=MyRenamedButton + ${qrArgs} + `.trim(); + registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments, [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); + registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments, [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); + registerSlashCommand('qr-delete', qrDeleteCallback, [], `(set=string [label]) – deletes Quick Reply`, true, true); }); From 5071b9a3697c6499113b72f001e04f5d49c203ca Mon Sep 17 00:00:00 2001 From: based Date: Fri, 15 Dec 2023 02:01:42 +1000 Subject: [PATCH 108/179] webstorm moment --- public/scripts/extensions/shared.js | 8 ++++---- public/scripts/openai.js | 2 +- src/endpoints/prompt-converters.js | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js index fa2105082..5d9a10150 100644 --- a/public/scripts/extensions/shared.js +++ b/public/scripts/extensions/shared.js @@ -1,7 +1,7 @@ -import {getRequestHeaders} from '../../script.js'; -import {extension_settings} from '../extensions.js'; -import {SECRET_KEYS, secret_state} from '../secrets.js'; -import {createThumbnail} from '../utils.js'; +import { getRequestHeaders } from '../../script.js'; +import { extension_settings } from '../extensions.js'; +import { SECRET_KEYS, secret_state } from '../secrets.js'; +import { createThumbnail } from '../utils.js'; /** * Generates a caption for an image using a multimodal model. diff --git a/public/scripts/openai.js b/public/scripts/openai.js index b5b505e3f..7977c8ff9 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -56,7 +56,7 @@ import { resetScrollHeight, stringFormat, } from './utils.js'; -import {countTokensOpenAI, getTokenizerModel} from './tokenizers.js'; +import { countTokensOpenAI, getTokenizerModel } from './tokenizers.js'; import { formatInstructModeChat, formatInstructModeExamples, diff --git a/src/endpoints/prompt-converters.js b/src/endpoints/prompt-converters.js index e793acc52..4b5e1bc37 100644 --- a/src/endpoints/prompt-converters.js +++ b/src/endpoints/prompt-converters.js @@ -72,12 +72,12 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with return requestPrompt; } -function convertGooglePrompt(messages, type) { +function convertGooglePrompt(messages, model) { const contents = []; let lastRole = ''; let currentText = ''; - const isMultimodal = type === 'gemini-pro-vision'; + const isMultimodal = model === 'gemini-pro-vision'; if (isMultimodal) { const combinedText = messages.map((message) => { From 5e61ff8d05d05525d879f43328b8dbf9bf5693e6 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 16:11:03 +0000 Subject: [PATCH 109/179] fix help string --- public/scripts/extensions/quick-reply/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index dcaa695c2..357403527 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -974,7 +974,7 @@ jQuery(() => { title - bool - title / tooltip to be shown on button, e.g., title="My Fancy Button" `.trim(); const qrUpdateArgs = ` - newlabel - text - new text fort the button, e.g. newlabel=MyRenamedButton + newlabel - string - new text fort the button, e.g. newlabel=MyRenamedButton ${qrArgs} `.trim(); registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments, [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); From d4f96020f28774e6f9d8bb6219941fb08a23a6df Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:33:23 +0200 Subject: [PATCH 110/179] Migrate Palm secret key, fix vector source key access --- src/endpoints/secrets.js | 19 +++++++++++++++++++ src/endpoints/vectors.js | 4 ++-- ...{palm-vectors.js => makersuite-vectors.js} | 14 +++++++------- 3 files changed, 28 insertions(+), 9 deletions(-) rename src/{palm-vectors.js => makersuite-vectors.js} (71%) diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index c997e5efe..585bee7ee 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -44,6 +44,17 @@ function writeSecret(key, value) { writeFileAtomicSync(SECRETS_FILE, JSON.stringify(secrets, null, 4), 'utf-8'); } +function deleteSecret(key) { + if (!fs.existsSync(SECRETS_FILE)) { + return; + } + + const fileContents = fs.readFileSync(SECRETS_FILE, 'utf-8'); + const secrets = JSON.parse(fileContents); + delete secrets[key]; + writeFileAtomicSync(SECRETS_FILE, JSON.stringify(secrets, null, 4), 'utf-8'); +} + /** * Reads a secret from the secrets file * @param {string} key Secret key @@ -119,6 +130,14 @@ function migrateSecrets(settingsFile) { modified = true; } + const palmKey = readSecret('api_key_palm'); + if (palmKey) { + console.log('Migrating Palm key...'); + writeSecret(SECRET_KEYS.MAKERSUITE, palmKey); + deleteSecret('api_key_palm'); + modified = true; + } + if (modified) { console.log('Writing updated settings.json...'); const settingsContent = JSON.stringify(settings, null, 4); diff --git a/src/endpoints/vectors.js b/src/endpoints/vectors.js index 387803ccb..e49d157fa 100644 --- a/src/endpoints/vectors.js +++ b/src/endpoints/vectors.js @@ -17,7 +17,7 @@ async function getVector(source, text) { case 'transformers': return require('../embedding').getTransformersVector(text); case 'palm': - return require('../palm-vectors').getPaLMVector(text); + return require('../makersuite-vectors').getMakerSuiteVector(text); } throw new Error(`Unknown vector source ${source}`); @@ -196,7 +196,7 @@ router.post('/purge', jsonParser, async (req, res) => { const collectionId = String(req.body.collectionId); - const sources = ['transformers', 'openai']; + const sources = ['transformers', 'openai', 'palm']; for (const source of sources) { const index = await getIndex(collectionId, source, false); diff --git a/src/palm-vectors.js b/src/makersuite-vectors.js similarity index 71% rename from src/palm-vectors.js rename to src/makersuite-vectors.js index b4e6a68bd..66d1a6fd8 100644 --- a/src/palm-vectors.js +++ b/src/makersuite-vectors.js @@ -6,12 +6,12 @@ const { SECRET_KEYS, readSecret } = require('./endpoints/secrets'); * @param {string} text - The text to get the vector for * @returns {Promise} - The vector for the text */ -async function getPaLMVector(text) { - const key = readSecret(SECRET_KEYS.PALM); +async function getMakerSuiteVector(text) { + const key = readSecret(SECRET_KEYS.MAKERSUITE); if (!key) { - console.log('No PaLM key found'); - throw new Error('No PaLM key found'); + console.log('No MakerSuite key found'); + throw new Error('No MakerSuite key found'); } const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/embedding-gecko-001:embedText?key=${key}`, { @@ -26,8 +26,8 @@ async function getPaLMVector(text) { if (!response.ok) { const text = await response.text(); - console.log('PaLM request failed', response.statusText, text); - throw new Error('PaLM request failed'); + console.log('MakerSuite request failed', response.statusText, text); + throw new Error('MakerSuite request failed'); } const data = await response.json(); @@ -39,5 +39,5 @@ async function getPaLMVector(text) { } module.exports = { - getPaLMVector, + getMakerSuiteVector, }; From 6bb894286e65e40e80d246b508d578bf6e6389fa Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:54:31 +0200 Subject: [PATCH 111/179] Migrate palm source to makersuite --- public/scripts/openai.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 7977c8ff9..db0d7c06d 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -2411,6 +2411,11 @@ function loadOpenAISettings(data, settings) { } $('#openai_logit_bias_preset').trigger('change'); + // Upgrade Palm to Makersuite + if (oai_settings.chat_completion_source === 'palm') { + oai_settings.chat_completion_source = chat_completion_sources.MAKERSUITE; + } + $('#chat_completion_source').val(oai_settings.chat_completion_source).trigger('change'); $('#oai_max_context_unlocked').prop('checked', oai_settings.max_context_unlocked); } From a6bb75456705af53d8a4de7d93ce4bf49992ea5d Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:56:43 +0200 Subject: [PATCH 112/179] Fix API key access --- public/scripts/extensions/vectors/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scripts/extensions/vectors/index.js b/public/scripts/extensions/vectors/index.js index e02501f2e..214b1d887 100644 --- a/public/scripts/extensions/vectors/index.js +++ b/public/scripts/extensions/vectors/index.js @@ -394,7 +394,7 @@ async function getSavedHashes(collectionId) { */ async function insertVectorItems(collectionId, items) { if (settings.source === 'openai' && !secret_state[SECRET_KEYS.OPENAI] || - settings.source === 'palm' && !secret_state[SECRET_KEYS.PALM]) { + settings.source === 'palm' && !secret_state[SECRET_KEYS.MAKERSUITE]) { throw new Error('Vectors: API key missing', { cause: 'api_key_missing' }); } From 47c71a62f35a378d4f4435ca4dc87579c22176d0 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:58:27 +0200 Subject: [PATCH 113/179] Don't rewrite settings if just moving the key --- src/endpoints/secrets.js | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index 585bee7ee..5da0cc730 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -96,6 +96,13 @@ function readSecretState() { * @returns {void} */ function migrateSecrets(settingsFile) { + const palmKey = readSecret('api_key_palm'); + if (palmKey) { + console.log('Migrating Palm key...'); + writeSecret(SECRET_KEYS.MAKERSUITE, palmKey); + deleteSecret('api_key_palm'); + } + if (!fs.existsSync(settingsFile)) { console.log('Settings file does not exist'); return; @@ -130,14 +137,6 @@ function migrateSecrets(settingsFile) { modified = true; } - const palmKey = readSecret('api_key_palm'); - if (palmKey) { - console.log('Migrating Palm key...'); - writeSecret(SECRET_KEYS.MAKERSUITE, palmKey); - deleteSecret('api_key_palm'); - modified = true; - } - if (modified) { console.log('Writing updated settings.json...'); const settingsContent = JSON.stringify(settings, null, 4); From d1be9d534716ee0fa6f2f9cb364f749cc6a5d676 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:05:27 +0200 Subject: [PATCH 114/179] Fix JSDoc + lint + readability --- src/endpoints/backends/chat-completions.js | 21 +++++++++------------ src/endpoints/prompt-converters.js | 6 ++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 16b87ecd6..bd8969ac2 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -1,5 +1,6 @@ const express = require('express'); const fetch = require('node-fetch').default; +const { Readable } = require('stream'); const { jsonParser } = require('../../express-common'); const { CHAT_COMPLETION_SOURCES, MAKERSUITE_SAFETY } = require('../../constants'); @@ -151,20 +152,16 @@ async function sendScaleRequest(request, response) { * @param {express.Request} request Express request * @param {express.Response} response Express response */ -/** - * @param {express.Request} request - * @param {express.Response} response - */ async function sendMakerSuiteRequest(request, response) { - const api_key_makersuite = readSecret(SECRET_KEYS.MAKERSUITE); + const apiKey = readSecret(SECRET_KEYS.MAKERSUITE); - if (!api_key_makersuite) { + if (!apiKey) { console.log('MakerSuite API key is missing.'); return response.status(400).send({ error: true }); } - const google_model = request.body.model; - const should_stream = request.body.stream; + const model = request.body.model; + const stream = request.body.stream; const generationConfig = { stopSequences: request.body.stop, @@ -176,7 +173,7 @@ async function sendMakerSuiteRequest(request, response) { }; const body = { - contents: convertGooglePrompt(request.body.messages, google_model), + contents: convertGooglePrompt(request.body.messages, model), safetySettings: MAKERSUITE_SAFETY, generationConfig: generationConfig, }; @@ -188,7 +185,7 @@ async function sendMakerSuiteRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${google_model}:${should_stream ? 'streamGenerateContent' : 'generateContent'}?key=${api_key_makersuite}`, { + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${model}:${stream ? 'streamGenerateContent' : 'generateContent'}?key=${apiKey}`, { body: JSON.stringify(body), method: 'POST', headers: { @@ -198,7 +195,7 @@ async function sendMakerSuiteRequest(request, response) { timeout: 0, }); // have to do this because of their busted ass streaming endpoint - if (should_stream) { + if (stream) { try { let partialData = ''; generateResponse.body.on('data', (data) => { @@ -221,7 +218,7 @@ async function sendMakerSuiteRequest(request, response) { }); request.socket.on('close', function () { - generateResponse.body.destroy(); + if (generateResponse.body instanceof Readable) generateResponse.body.destroy(); response.end(); }); diff --git a/src/endpoints/prompt-converters.js b/src/endpoints/prompt-converters.js index 4b5e1bc37..a31a9be47 100644 --- a/src/endpoints/prompt-converters.js +++ b/src/endpoints/prompt-converters.js @@ -72,6 +72,12 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with return requestPrompt; } +/** + * Convert a prompt from the ChatML objects to the format used by Google MakerSuite models. + * @param {object[]} messages Array of messages + * @param {string} model Model name + * @returns {object[]} Prompt for Google MakerSuite models + */ function convertGooglePrompt(messages, model) { const contents = []; let lastRole = ''; From 3e44e4240c7ca9592a7274368dcada4e4e67ebe0 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 18:09:33 +0000 Subject: [PATCH 115/179] handle escapes for pipes and curcly brackets --- public/scripts/extensions/quick-reply/index.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 357403527..854bcd9e3 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -720,7 +720,11 @@ function saveQROrder() { async function qrCreateCallback(args, mes) { const qr = { label: args.label ?? '', - mes: mes, + mes: (mes ?? '') + .replace(/\\\|/g, '|') + .replace(/\\\{/g, '{') + .replace(/\\\}/g, '}') + , title: args.title ?? '', autoExecute_chatLoad: JSON.parse(args.load ?? false), autoExecute_userMessage: JSON.parse(args.user ?? false), @@ -746,7 +750,11 @@ async function qrUpdateCallback(args, mes) { const oqr = preset.quickReplySlots[idx]; const qr = { label: args.newlabel ?? oqr.label ?? '', - mes: mes ?? oqr.mes, + mes: (mes ?? oqr.mes) + .replace('\\|', '|') + .replace('\\{', '{') + .replace('\\}', '}') + , title: args.title ?? oqr.title ?? '', autoExecute_chatLoad: JSON.parse(args.load ?? oqr.autoExecute_chatLoad ?? false), autoExecute_userMessage: JSON.parse(args.user ?? oqr.autoExecute_userMessage ?? false), From 90ec6b915901a8f2ce145740b81f1148fd3cd960 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 18:25:19 +0000 Subject: [PATCH 116/179] add slash commands for context menus --- .../scripts/extensions/quick-reply/index.js | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 854bcd9e3..955dcff8e 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -787,6 +787,61 @@ async function qrDeleteCallback(args, label) { applyQuickReplyPreset(selected_preset); } +async function qrContextAddCallback(args, presetName) { + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + const idx = preset.quickReplySlots.findIndex(x => x.label == args.label); + const oqr = preset.quickReplySlots[idx]; + if (!oqr.contextMenu) { + oqr.contextMenu = []; + } + let item = oqr.contextMenu.find(it=>it.preset == presetName); + if (item) { + item.chain = JSON.parse(args.chain ?? 'null') ?? item.chain ?? false; + } else { + oqr.contextMenu.push({preset:presetName, chain: JSON.parse(args.chain ?? 'false')}); + } + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} +async function qrContextDeleteCallback(args, presetName) { + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + const idx = preset.quickReplySlots.findIndex(x => x.label == args.label); + const oqr = preset.quickReplySlots[idx]; + if (!oqr.contextMenu) return; + const ctxIdx = oqr.contextMenu.findIndex(it=>it.preset == presetName); + if (ctxIdx > -1) { + oqr.contextMenu.splice(ctxIdx, 1); + } + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} +async function qrContextClearCallback(args, label) { + const preset = presets.find(x => x.name == (args.set ?? selected_preset)); + const idx = preset.quickReplySlots.findIndex(x => x.label == label); + const oqr = preset.quickReplySlots[idx]; + oqr.contextMenu = []; + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(preset), + }); + saveSettingsDebounced(); + await delay(400); + applyQuickReplyPreset(selected_preset); +} + let onMessageSentExecuting = false; let onMessageReceivedExecuting = false; let onChatChangedExecuting = false; @@ -988,4 +1043,7 @@ jQuery(() => { registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments, [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments, [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); registerSlashCommand('qr-delete', qrDeleteCallback, [], `(set=string [label]) – deletes Quick Reply`, true, true); + registerSlashCommand('qr-contextadd', qrContextAddCallback, [], `(set=string label=string chain=bool [preset name]) – add context menu preset to a QR, example: /qr-contextadd set=MyPreset label=MyButton chain=true MyOtherPreset`, true, true); + registerSlashCommand('qr-contextdel', qrContextDeleteCallback, [], `(set=string label=string [preset name]) – remove context menu preset from a QR, example: /qr-contextdel set=MyPreset label=MyButton MyOtherPreset`, true, true); + registerSlashCommand('qr-contextclear', qrContextClearCallback, [], `(set=string [label]) – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); }); From bb8b8f9386ebecbecef5764e64688aa96802413d Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:36:31 +0200 Subject: [PATCH 117/179] Fix sending PNG/WEBP to Google captioning --- .../extensions/stable-diffusion/index.js | 26 ++++++++++++------- src/endpoints/google.js | 20 ++++++++------ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/public/scripts/extensions/stable-diffusion/index.js b/public/scripts/extensions/stable-diffusion/index.js index 9935d43e4..204e268c2 100644 --- a/public/scripts/extensions/stable-diffusion/index.js +++ b/public/scripts/extensions/stable-diffusion/index.js @@ -1756,22 +1756,28 @@ async function generateMultimodalPrompt(generationType, quietPrompt) { } } - const response = await fetch(avatarUrl); + try { + const response = await fetch(avatarUrl); - if (!response.ok) { - throw new Error('Could not fetch avatar image.'); - } + if (!response.ok) { + throw new Error('Could not fetch avatar image.'); + } - const avatarBlob = await response.blob(); - const avatarBase64 = await getBase64Async(avatarBlob); + const avatarBlob = await response.blob(); + const avatarBase64 = await getBase64Async(avatarBlob); - const caption = await getMultimodalCaption(avatarBase64, quietPrompt); + const caption = await getMultimodalCaption(avatarBase64, quietPrompt); - if (!caption) { + if (!caption) { + throw new Error('No caption returned from the API.'); + } + + return caption; + } catch (error) { + console.error(error); + toastr.error('Multimodal captioning failed. Please try again.', 'Image Generation'); throw new Error('Multimodal captioning failed.'); } - - return caption; } /** diff --git a/src/endpoints/google.js b/src/endpoints/google.js index df675c253..1e74f71c7 100644 --- a/src/endpoints/google.js +++ b/src/endpoints/google.js @@ -8,21 +8,26 @@ const router = express.Router(); router.post('/caption-image', jsonParser, async (request, response) => { try { + const mimeType = request.body.image.split(';')[0].split(':')[1]; + const base64Data = request.body.image.split(',')[1]; const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key=${readSecret(SECRET_KEYS.MAKERSUITE)}`; const body = { contents: [{ parts: [ { text: request.body.prompt }, - { inlineData: { - mimeType: 'image/png', //jpg images seem to work fine even with this mimetype set? - data: request.body.image, - }, + { + inlineData: { + mimeType: 'image/png', // It needs to specify a MIME type in data if it's not a PNG + data: mimeType === 'image/png' ? base64Data : request.body.image, + }, }], }], safetySettings: MAKERSUITE_SAFETY, generationConfig: { maxOutputTokens: 1000 }, }; + console.log('Multimodal captioning request', body); + const result = await fetch(url, { body: JSON.stringify(body), method: 'POST', @@ -32,10 +37,9 @@ router.post('/caption-image', jsonParser, async (request, response) => { timeout: 0, }); - console.log('Multimodal captioning request', body); - if (!result.ok) { - console.log(`MakerSuite API returned error: ${result.status} ${result.statusText} ${await result.text()}`); + const error = await result.json(); + console.log(`MakerSuite API returned error: ${result.status} ${result.statusText}`, error); return response.status(result.status).send({ error: true }); } @@ -43,7 +47,7 @@ router.post('/caption-image', jsonParser, async (request, response) => { console.log('Multimodal captioning response', data); const candidates = data?.candidates; - if(!candidates) { + if (!candidates) { return response.status(500).send('No candidates found, image was most likely filtered.'); } From 5e3584d5edf6d07d802490aaa8e8d4754e74d7be Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 18:51:55 +0000 Subject: [PATCH 118/179] add slash command to create QR preset --- .../scripts/extensions/quick-reply/index.js | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 955dcff8e..a58ec8f2c 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -842,6 +842,25 @@ async function qrContextClearCallback(args, label) { applyQuickReplyPreset(selected_preset); } +async function qrPresetAddCallback(args, name) { + const quickReplyPreset = { + name: name, + quickReplyEnabled: JSON.parse(args.enabled ?? null) ?? true, + quickActionEnabled: JSON.parse(args.nosend ?? null) ?? false, + placeBeforeInputEnabled: JSON.parse(args.before ?? null) ?? false, + quickReplySlots: [], + numberOfSlots: Number(args.slots ?? '0'), + AutoInputInject: JSON.parse(args.inject ?? 'false'), + }; + + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(quickReplyPreset), + }); + await updateQuickReplyPresetList(); +} + let onMessageSentExecuting = false; let onMessageReceivedExecuting = false; let onChatChangedExecuting = false; @@ -1046,4 +1065,12 @@ jQuery(() => { registerSlashCommand('qr-contextadd', qrContextAddCallback, [], `(set=string label=string chain=bool [preset name]) – add context menu preset to a QR, example: /qr-contextadd set=MyPreset label=MyButton chain=true MyOtherPreset`, true, true); registerSlashCommand('qr-contextdel', qrContextDeleteCallback, [], `(set=string label=string [preset name]) – remove context menu preset from a QR, example: /qr-contextdel set=MyPreset label=MyButton MyOtherPreset`, true, true); registerSlashCommand('qr-contextclear', qrContextClearCallback, [], `(set=string [label]) – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); + const presetArgs = ` + enabled - bool - enable or disable the preset + nosend - bool - disable send / insert in user input (invalid for slash commands) + before - bool - place QR before user input + slots - int - number of slots + inject - bool - inject user input automatically (if disabled use {{input}}) + `.trim(); + registerSlashCommand('qr-presetadd', qrPresetAddCallback, [], `(arguments [label])\n arguments:\n ${presetArgs} – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); }); From dbf28fce4765455650e6ef5f47ee055c5a023d63 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 18:52:23 +0000 Subject: [PATCH 119/179] cleanup autocomplete help text --- public/scripts/extensions/quick-reply/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index a58ec8f2c..9002901aa 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -1059,8 +1059,8 @@ jQuery(() => { newlabel - string - new text fort the button, e.g. newlabel=MyRenamedButton ${qrArgs} `.trim(); - registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments, [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); - registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments, [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); + registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); + registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); registerSlashCommand('qr-delete', qrDeleteCallback, [], `(set=string [label]) – deletes Quick Reply`, true, true); registerSlashCommand('qr-contextadd', qrContextAddCallback, [], `(set=string label=string chain=bool [preset name]) – add context menu preset to a QR, example: /qr-contextadd set=MyPreset label=MyButton chain=true MyOtherPreset`, true, true); registerSlashCommand('qr-contextdel', qrContextDeleteCallback, [], `(set=string label=string [preset name]) – remove context menu preset from a QR, example: /qr-contextdel set=MyPreset label=MyButton MyOtherPreset`, true, true); From 0519629b700afb172c65780c5cea8d1c7583589b Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 18:54:42 +0000 Subject: [PATCH 120/179] fix autocomplete help text --- public/scripts/extensions/quick-reply/index.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index 9002901aa..ef61f75e6 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -1060,11 +1060,11 @@ jQuery(() => { ${qrArgs} `.trim(); registerSlashCommand('qr-create', qrCreateCallback, [], `(arguments [message])\n arguments:\n ${qrArgs} – creates a new Quick Reply, example: /qr-create set=MyPreset label=MyButton /echo 123`, true, true); - registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); + registerSlashCommand('qr-update', qrUpdateCallback, [], `(arguments [message])\n arguments:\n ${qrUpdateArgs} – updates Quick Reply, example: /qr-update set=MyPreset label=MyButton newlabel=MyRenamedButton /echo 123`, true, true); registerSlashCommand('qr-delete', qrDeleteCallback, [], `(set=string [label]) – deletes Quick Reply`, true, true); - registerSlashCommand('qr-contextadd', qrContextAddCallback, [], `(set=string label=string chain=bool [preset name]) – add context menu preset to a QR, example: /qr-contextadd set=MyPreset label=MyButton chain=true MyOtherPreset`, true, true); - registerSlashCommand('qr-contextdel', qrContextDeleteCallback, [], `(set=string label=string [preset name]) – remove context menu preset from a QR, example: /qr-contextdel set=MyPreset label=MyButton MyOtherPreset`, true, true); - registerSlashCommand('qr-contextclear', qrContextClearCallback, [], `(set=string [label]) – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); + registerSlashCommand('qr-contextadd', qrContextAddCallback, [], `(set=string label=string chain=bool [preset name]) – add context menu preset to a QR, example: /qr-contextadd set=MyPreset label=MyButton chain=true MyOtherPreset`, true, true); + registerSlashCommand('qr-contextdel', qrContextDeleteCallback, [], `(set=string label=string [preset name]) – remove context menu preset from a QR, example: /qr-contextdel set=MyPreset label=MyButton MyOtherPreset`, true, true); + registerSlashCommand('qr-contextclear', qrContextClearCallback, [], `(set=string [label]) – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); const presetArgs = ` enabled - bool - enable or disable the preset nosend - bool - disable send / insert in user input (invalid for slash commands) @@ -1072,5 +1072,5 @@ jQuery(() => { slots - int - number of slots inject - bool - inject user input automatically (if disabled use {{input}}) `.trim(); - registerSlashCommand('qr-presetadd', qrPresetAddCallback, [], `(arguments [label])\n arguments:\n ${presetArgs} – remove all context menu presets from a QR, example: /qr-contextclear set=MyPreset MyButton`, true, true); + registerSlashCommand('qr-presetadd', qrPresetAddCallback, [], `(arguments [label])\n arguments:\n ${presetArgs} – create a new preset (overrides existing ones), example: /qr-presetadd slots=3 MyNewPreset`, true, true); }); From 2045e414d1938610eccf52586214bc4843aef70b Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:57:43 +0200 Subject: [PATCH 121/179] lint: format fix --- public/scripts/tokenizers.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 83842d73d..196f3ec9c 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -376,7 +376,7 @@ export function getTokenizerModel() { } } - if(oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { + if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) { return oai_settings.google_model; } @@ -395,7 +395,7 @@ export function countTokensOpenAI(messages, full = false) { const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; const shouldTokenizeGoogle = oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE && oai_settings.use_google_tokenizer; let tokenizerEndpoint = ''; - if(shouldTokenizeAI21) { + if (shouldTokenizeAI21) { tokenizerEndpoint = '/api/tokenizers/ai21/count'; } else if (shouldTokenizeGoogle) { tokenizerEndpoint = `/api/tokenizers/google/count?model=${getTokenizerModel()}`; From b5b85724c74133ffa1072b514d9825f39bbc2d34 Mon Sep 17 00:00:00 2001 From: LenAnderson Date: Thu, 14 Dec 2023 19:08:27 +0000 Subject: [PATCH 122/179] add slash command to update existing QR preset --- .../scripts/extensions/quick-reply/index.js | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/public/scripts/extensions/quick-reply/index.js b/public/scripts/extensions/quick-reply/index.js index ef61f75e6..737eccb52 100644 --- a/public/scripts/extensions/quick-reply/index.js +++ b/public/scripts/extensions/quick-reply/index.js @@ -861,6 +861,26 @@ async function qrPresetAddCallback(args, name) { await updateQuickReplyPresetList(); } +async function qrPresetUpdateCallback(args, name) { + const preset = presets.find(it=>it.name==name); + const quickReplyPreset = { + name: preset.name, + quickReplyEnabled: JSON.parse(args.enabled ?? null) ?? preset.quickReplyEnabled, + quickActionEnabled: JSON.parse(args.nosend ?? null) ?? preset.quickActionEnabled, + placeBeforeInputEnabled: JSON.parse(args.before ?? null) ?? preset.placeBeforeInputEnabled, + quickReplySlots: preset.quickReplySlots, + numberOfSlots: Number(args.slots ?? preset.numberOfSlots), + AutoInputInject: JSON.parse(args.inject ?? 'null') ?? preset.AutoInputInject, + }; + Object.assign(preset, quickReplyPreset); + + const response = await fetch('/savequickreply', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify(quickReplyPreset), + }); +} + let onMessageSentExecuting = false; let onMessageReceivedExecuting = false; let onChatChangedExecuting = false; @@ -1073,4 +1093,5 @@ jQuery(() => { inject - bool - inject user input automatically (if disabled use {{input}}) `.trim(); registerSlashCommand('qr-presetadd', qrPresetAddCallback, [], `(arguments [label])\n arguments:\n ${presetArgs} – create a new preset (overrides existing ones), example: /qr-presetadd slots=3 MyNewPreset`, true, true); + registerSlashCommand('qr-presetupdate', qrPresetUpdateCallback, [], `(arguments [label])\n arguments:\n ${presetArgs} – update an existing preset, example: /qr-presetupdate enabled=false MyPreset`, true, true); }); From eec28469f8ad492a5f3cc331b93d9e7223aa56f8 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 21:21:37 +0200 Subject: [PATCH 123/179] Fix server crash if multimodal prompt contains no image --- public/index.html | 2 +- src/endpoints/prompt-converters.js | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index 67bb1b96d..bd12da858 100644 --- a/public/index.html +++ b/public/index.html @@ -1497,7 +1497,7 @@
    -
    +