Files
SillyTavern/public/scripts/custom-request.js
2025-05-12 23:00:12 +03:00

584 lines
23 KiB
JavaScript

import { getPresetManager } from './preset-manager.js';
import { extractMessageFromData, getGenerateUrl, getRequestHeaders } from '../script.js';
import { getTextGenServer } from './textgen-settings.js';
import { extractReasoningFromData } from './reasoning.js';
import { formatInstructModeChat, formatInstructModePrompt, getInstructStoppingSequences, names_behavior_types } from './instruct-mode.js';
import { getStreamingReply, tryParseStreamingError } from './openai.js';
import EventSourceStream from './sse-stream.js';
// #region Type Definitions
/**
* @typedef {Object} TextCompletionRequestBase
* @property {boolean?} [stream=false] - Whether to stream the response
* @property {number} max_tokens - Maximum number of tokens to generate
* @property {string} [model] - Optional model name
* @property {string} api_type - Type of API to use
* @property {string} [api_server] - Optional API server URL
* @property {number} [temperature] - Optional temperature parameter
* @property {number} [min_p] - Optional min_p parameter
*/
/**
* @typedef {Object} TextCompletionPayloadBase
* @property {boolean?} [stream=false] - Whether to stream the response
* @property {string} prompt - The text prompt for completion
* @property {number} max_tokens - Maximum number of tokens to generate
* @property {number} max_new_tokens - Alias for max_tokens
* @property {string} [model] - Optional model name
* @property {string} api_type - Type of API to use
* @property {string} api_server - API server URL
* @property {number} [temperature] - Optional temperature parameter
*/
/** @typedef {Record<string, any> & TextCompletionPayloadBase} TextCompletionPayload */
/**
* @typedef {Object} ChatCompletionMessage
* @property {string} role - The role of the message author (e.g., "user", "assistant", "system")
* @property {string} content - The content of the message
*/
/**
* @typedef {Object} ChatCompletionPayloadBase
* @property {boolean?} [stream=false] - Whether to stream the response
* @property {ChatCompletionMessage[]} messages - Array of chat messages
* @property {string} [model] - Optional model name to use for completion
* @property {string} chat_completion_source - Source provider
* @property {number} max_tokens - Maximum number of tokens to generate
* @property {number} [temperature] - Optional temperature parameter for response randomness
* @property {string} [custom_url] - Optional custom URL
* @property {string} [reverse_proxy] - Optional reverse proxy URL
* @property {string} [proxy_password] - Optional proxy password
*/
/** @typedef {Record<string, any> & ChatCompletionPayloadBase} ChatCompletionPayload */
/**
* @typedef {Object} ExtractedData
* @property {string} content - Extracted content.
* @property {string} reasoning - Extracted reasoning.
*/
/**
* @typedef {Object} StreamResponse
* @property {string} text - Generated text.
* @property {string[]} swipes - Generated swipes
* @property {Object} state - Generated state
* @property {string?} [state.reasoning] - Generated reasoning
* @property {string?} [state.image] - Generated image
*/
// #endregion
/**
* Creates & sends a text completion request.
*/
export class TextCompletionService {
static TYPE = 'textgenerationwebui';
/**
* @param {Record<string, any> & TextCompletionRequestBase & {prompt: string}} custom
* @returns {TextCompletionPayload}
*/
static createRequestData({ stream = false, prompt, max_tokens, model, api_type, api_server, temperature, min_p, ...props }) {
const payload = {
stream,
prompt,
max_tokens,
max_new_tokens: max_tokens,
model,
api_type,
api_server: api_server ?? getTextGenServer(api_type),
temperature,
min_p,
...props,
};
// Remove undefined values to avoid API errors
Object.keys(payload).forEach(key => {
if (payload[key] === undefined) {
delete payload[key];
}
});
return payload;
}
/**
* Sends a text completion request to the specified server
* @param {TextCompletionPayload} data Request data
* @param {boolean?} extractData Extract message from the response. Default true
* @param {AbortSignal?} signal
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
* @throws {Error}
*/
static async sendRequest(data, extractData = true, signal = null) {
if (!data.stream) {
const response = await fetch(getGenerateUrl(this.TYPE), {
method: 'POST',
headers: getRequestHeaders(),
cache: 'no-cache',
body: JSON.stringify(data),
signal: signal ?? new AbortController().signal,
});
const json = await response.json();
if (!response.ok || json.error) {
throw json;
}
if (!extractData) {
return json;
}
return {
content: extractMessageFromData(json, this.TYPE),
reasoning: extractReasoningFromData(json, {
mainApi: this.TYPE,
textGenType: data.api_type,
ignoreShowThoughts: true,
}),
};
}
const response = await fetch('/api/backends/text-completions/generate', {
method: 'POST',
headers: getRequestHeaders(),
cache: 'no-cache',
body: JSON.stringify(data),
signal: signal ?? new AbortController().signal,
});
if (!response.ok) {
const text = await response.text();
tryParseStreamingError(response, text, { quiet: true });
throw new Error(`Got response status ${response.status}`);
}
const eventStream = new EventSourceStream();
response.body.pipeThrough(eventStream);
const reader = eventStream.readable.getReader();
return async function* streamData() {
let text = '';
const swipes = [];
const state = { reasoning: '' };
while (true) {
const { done, value } = await reader.read();
if (done) return;
if (value.data === '[DONE]') return;
tryParseStreamingError(response, value.data, { quiet: true });
let data = JSON.parse(value.data);
if (data?.choices?.[0]?.index > 0) {
const swipeIndex = data.choices[0].index - 1;
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
} else {
const newText = data?.choices?.[0]?.text || data?.content || '';
text += newText;
state.reasoning += data?.choices?.[0]?.reasoning ?? '';
}
yield { text, swipes, state };
}
};
}
/**
* Process and send a text completion request with optional preset & instruct
* @param {Record<string, any> & TextCompletionRequestBase & {prompt: (ChatCompletionMessage & {ignoreInstruct?: boolean})[] |string}} custom
* @param {Object} options - Configuration options
* @param {string?} [options.presetName] - Name of the preset to use for generation settings
* @param {string?} [options.instructName] - Name of instruct preset for message formatting
* @param {Partial<InstructSettings>?} [options.instructSettings] - Override instruct settings
* @param {boolean} extractData - Whether to extract structured data from response
* @param {AbortSignal?} [signal]
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
* @throws {Error}
*/
static async processRequest(
custom,
options = {},
extractData = true,
signal = null,
) {
const { presetName, instructName } = options;
let requestData = { ...custom };
const prompt = custom.prompt;
// Apply generation preset if specified
if (presetName) {
const presetManager = getPresetManager(this.TYPE);
if (presetManager) {
const preset = presetManager.getCompletionPresetByName(presetName);
if (preset) {
// Convert preset to payload and merge with custom parameters
const presetPayload = this.presetToGeneratePayload(preset, {});
requestData = { ...presetPayload, ...requestData };
} else {
console.warn(`Preset "${presetName}" not found, continuing with default settings`);
}
} else {
console.warn('Preset manager not found, continuing with default settings');
}
}
/** @type {InstructSettings | undefined} */
let instructPreset;
// Handle instruct formatting if requested
if (Array.isArray(prompt) && instructName) {
const instructPresetManager = getPresetManager('instruct');
instructPreset = instructPresetManager?.getCompletionPresetByName(instructName);
if (instructPreset) {
// Clone the preset to avoid modifying the original
instructPreset = structuredClone(instructPreset);
instructPreset.names_behavior = names_behavior_types.NONE;
if (options.instructSettings) {
Object.assign(instructPreset, options.instructSettings);
}
// Format messages using instruct formatting
const formattedMessages = [];
const prefillActive = prompt.length > 0 ? prompt[prompt.length - 1].role === 'assistant' : false;
for (const message of prompt) {
let messageContent = message.content;
if (!message.ignoreInstruct) {
const isLastMessage = message === prompt[prompt.length - 1];
// This complicated logic means:
// 1. If prefill is not active, format all messages
// 2. If prefill is active, format all messages except the last one
if (!isLastMessage || !prefillActive) {
messageContent = formatInstructModeChat(
message.role,
message.content,
message.role === 'user',
false,
undefined,
undefined,
undefined,
undefined,
instructPreset,
);
}
// Add prompt formatting for the last message.
if (isLastMessage) {
if (!prefillActive) { // e.g. "<|im_start|>user:"
messageContent += formatInstructModePrompt(
undefined,
false,
undefined,
undefined,
undefined,
false,
false,
instructPreset,
);
} else { // e.g. "<|im_start|>assistant: Hello, my name is"
const overridenInstructPreset = structuredClone(instructPreset);
overridenInstructPreset.output_suffix = '';
overridenInstructPreset.wrap = false;
messageContent = formatInstructModeChat(
message.role,
message.content,
false, // since it is assistant
false,
undefined,
undefined,
undefined,
undefined,
overridenInstructPreset,
);
}
}
}
formattedMessages.push(messageContent);
}
requestData.prompt = formattedMessages.join('');
const stoppingStrings = getInstructStoppingSequences({ customInstruct: instructPreset, useStopStrings: false });
requestData.stop = stoppingStrings;
requestData.stopping_strings = stoppingStrings;
} else {
console.warn(`Instruct preset "${instructName}" not found, using basic formatting`);
requestData.prompt = prompt.map(x => x.content).join('\n\n');
}
} else if (typeof prompt === 'string') {
requestData.prompt = prompt;
} else {
requestData.prompt = prompt.map(x => x.content).join('\n\n');
}
// @ts-ignore
const data = this.createRequestData(requestData);
const response = await this.sendRequest(data, extractData, signal);
// Remove stopping strings from the end
if (!data.stream && extractData) {
/** @type {ExtractedData} */
// @ts-ignore
const extractedData = response;
let message = extractedData.content;
message = message.replace(/[^\S\r\n]+$/gm, '');
if (requestData.stopping_strings) {
for (const stoppingString of requestData.stopping_strings) {
if (stoppingString.length) {
for (let j = stoppingString.length; j > 0; j--) {
if (message.slice(-j) === stoppingString.slice(0, j)) {
message = message.slice(0, -j);
break;
}
}
}
}
}
if (instructPreset) {
[
instructPreset.stop_sequence,
instructPreset.input_sequence,
].forEach(sequence => {
if (sequence?.trim()) {
const index = message.indexOf(sequence);
if (index !== -1) {
message = message.substring(0, index);
}
}
});
[
instructPreset.output_sequence,
instructPreset.last_output_sequence,
].forEach(sequences => {
if (sequences) {
sequences.split('\n')
.filter(line => line.trim() !== '')
.forEach(line => {
message = message.replaceAll(line, '');
});
}
});
}
extractedData.content = message;
}
return response;
}
/**
* Converts a preset to a valid text completion payload.
* Only supports temperature.
* @param {Object} preset - The preset configuration
* @param {Object} customPreset - Additional parameters to override preset values
* @returns {Object} - Formatted payload for text completion API
*/
static presetToGeneratePayload(preset, customPreset = {}) {
if (!preset || typeof preset !== 'object') {
throw new Error('Invalid preset: must be an object');
}
// Merge preset with custom parameters
const settings = { ...preset, ...customPreset };
// Initialize base payload with common parameters
let payload = {
'temperature': settings.temp ? Number(settings.temp) : undefined,
'min_p': settings.min_p ? Number(settings.min_p) : undefined,
};
// Remove undefined values to avoid API errors
Object.keys(payload).forEach(key => {
if (payload[key] === undefined) {
delete payload[key];
}
});
return payload;
}
}
/**
* Creates & sends a chat completion request.
*/
export class ChatCompletionService {
static TYPE = 'openai';
/**
* @param {ChatCompletionPayload} custom
* @returns {ChatCompletionPayload}
*/
static createRequestData({ stream = false, messages, model, chat_completion_source, max_tokens, temperature, custom_url, reverse_proxy, proxy_password, ...props }) {
const payload = {
stream,
messages,
model,
chat_completion_source,
max_tokens,
temperature,
custom_url,
reverse_proxy,
proxy_password,
use_makersuite_sysprompt: true,
claude_use_sysprompt: true,
...props,
};
// Remove undefined values to avoid API errors
Object.keys(payload).forEach(key => {
if (payload[key] === undefined) {
delete payload[key];
}
});
return payload;
}
/**
* Sends a chat completion request
* @param {ChatCompletionPayload} data Request data
* @param {boolean?} extractData Extract message from the response. Default true
* @param {AbortSignal?} signal Abort signal
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
* @throws {Error}
*/
static async sendRequest(data, extractData = true, signal = null) {
const response = await fetch('/api/backends/chat-completions/generate', {
method: 'POST',
headers: getRequestHeaders(),
cache: 'no-cache',
body: JSON.stringify(data),
signal: signal ?? new AbortController().signal,
});
if (!data.stream) {
const json = await response.json();
if (!response.ok || json.error) {
throw json;
}
if (!extractData) {
return json;
}
return {
content: extractMessageFromData(json, this.TYPE),
reasoning: extractReasoningFromData(json, {
mainApi: this.TYPE,
textGenType: data.chat_completion_source,
ignoreShowThoughts: true,
}),
};
}
if (!response.ok) {
const text = await response.text();
tryParseStreamingError(response, text, { quiet: true });
throw new Error(`Got response status ${response.status}`);
}
const eventStream = new EventSourceStream();
response.body.pipeThrough(eventStream);
const reader = eventStream.readable.getReader();
return async function* streamData() {
let text = '';
const swipes = [];
const state = { reasoning: '', image: '' };
while (true) {
const { done, value } = await reader.read();
if (done) return;
const rawData = value.data;
if (rawData === '[DONE]') return;
tryParseStreamingError(response, rawData, { quiet: true });
const parsed = JSON.parse(rawData);
const reply = getStreamingReply(parsed, state, {
chatCompletionSource: data.chat_completion_source,
overrideShowThoughts: true,
});
if (Array.isArray(parsed?.choices) && parsed?.choices?.[0]?.index > 0) {
const swipeIndex = parsed.choices[0].index - 1;
swipes[swipeIndex] = (swipes[swipeIndex] || '') + reply;
} else {
text += reply;
}
yield { text, swipes: swipes, state };
}
};
}
/**
* Process and send a chat completion request with optional preset
* @param {ChatCompletionPayload} custom
* @param {Object} options - Configuration options
* @param {string?} [options.presetName] - Name of the preset to use for generation settings
* @param {boolean} [extractData=true] - Whether to extract structured data from response
* @param {AbortSignal?} [signal] - Abort signal
* @returns {Promise<ExtractedData | (() => AsyncGenerator<StreamResponse>)>} If not streaming, returns extracted data; if streaming, returns a function that creates an AsyncGenerator
* @throws {Error}
*/
static async processRequest(custom, options, extractData = true, signal = null) {
const { presetName } = options;
let requestData = { ...custom };
// Apply generation preset if specified
if (presetName) {
const presetManager = getPresetManager(this.TYPE);
if (presetManager) {
const preset = presetManager.getCompletionPresetByName(presetName);
if (preset) {
// Convert preset to payload and merge with custom parameters
const presetPayload = this.presetToGeneratePayload(preset, {});
requestData = { ...presetPayload, ...requestData };
} else {
console.warn(`Preset "${presetName}" not found, continuing with default settings`);
}
} else {
console.warn('Preset manager not found, continuing with default settings');
}
}
const data = this.createRequestData(requestData);
return await this.sendRequest(data, extractData, signal);
}
/**
* Converts a preset to a valid chat completion payload
* Only supports temperature.
* @param {Object} preset - The preset configuration
* @param {Object} customParams - Additional parameters to override preset values
* @returns {Object} - Formatted payload for chat completion API
*/
static presetToGeneratePayload(preset, customParams = {}) {
if (!preset || typeof preset !== 'object') {
throw new Error('Invalid preset: must be an object');
}
// Merge preset with custom parameters
const settings = { ...preset, ...customParams };
// Initialize base payload with common parameters
const payload = {
temperature: settings.temperature ? Number(settings.temperature) : undefined,
};
// Remove undefined values to avoid API errors
Object.keys(payload).forEach(key => {
if (payload[key] === undefined) {
delete payload[key];
}
});
return payload;
}
}