mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Gemini inline images (#3681)
* Gemini images for non-streaming * Parse images on stream * Add toggle for image request * Add extraction params to extractImageFromData * Add explicit break and return * Add more JSdoc to processImageAttachment * Add file name prefix * Add object argument for saveReply * Add defaults to saveReply params * Use type for saveReply result * Change type check in saveReply backward compat
This commit is contained in:
114
public/script.js
114
public/script.js
@ -171,6 +171,7 @@ import {
|
||||
isElementInViewport,
|
||||
copyText,
|
||||
escapeHtml,
|
||||
saveBase64AsFile,
|
||||
} from './scripts/utils.js';
|
||||
import { debounce_timeout } from './scripts/constants.js';
|
||||
|
||||
@ -3203,6 +3204,8 @@ class StreamingProcessor {
|
||||
this.reasoningHandler = new ReasoningHandler(timeStarted);
|
||||
/** @type {PromptReasoning} */
|
||||
this.promptReasoning = promptReasoning;
|
||||
/** @type {string} */
|
||||
this.image = '';
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3250,7 +3253,7 @@ class StreamingProcessor {
|
||||
this.sendTextarea.value = '';
|
||||
this.sendTextarea.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
} else {
|
||||
await saveReply(this.type, text, true, '', [], '');
|
||||
await saveReply({ type: this.type, getMessage: text, fromStreaming: true });
|
||||
messageId = chat.length - 1;
|
||||
await this.#checkDomElements(messageId, continueOnReasoning);
|
||||
this.markUIGenStarted();
|
||||
@ -3372,6 +3375,11 @@ class StreamingProcessor {
|
||||
chat[messageId].swipe_info.push(...swipeInfoArray);
|
||||
}
|
||||
|
||||
if (this.image) {
|
||||
await processImageAttachment(chat[messageId], { imageUrl: this.image, parsedImage: null });
|
||||
appendMediaToMessage(chat[messageId], $(this.messageDom));
|
||||
}
|
||||
|
||||
if (this.type !== 'impersonate') {
|
||||
await eventSource.emit(event_types.MESSAGE_RECEIVED, this.messageId, this.type);
|
||||
await eventSource.emit(event_types.CHARACTER_MESSAGE_RENDERED, this.messageId, this.type);
|
||||
@ -3468,6 +3476,7 @@ class StreamingProcessor {
|
||||
}
|
||||
// Get the updated reasoning string into the handler
|
||||
this.reasoningHandler.updateReasoning(this.messageId, state?.reasoning);
|
||||
this.image = state?.image ?? '';
|
||||
await eventSource.emit(event_types.STREAM_TOKEN_RECEIVED, text);
|
||||
await sw.tick(async () => await this.onProgressStreaming(this.messageId, this.continueMessage + text));
|
||||
}
|
||||
@ -4866,6 +4875,7 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro
|
||||
let getMessage = extractMessageFromData(data);
|
||||
let title = extractTitleFromData(data);
|
||||
let reasoning = extractReasoningFromData(data);
|
||||
let imageUrl = extractImageFromData(data);
|
||||
kobold_horde_model = title;
|
||||
|
||||
const swipes = extractMultiSwipes(data, type);
|
||||
@ -4898,10 +4908,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro
|
||||
else {
|
||||
// Without streaming we'll be having a full message on continuation. Treat it as a last chunk.
|
||||
if (originalType !== 'continue') {
|
||||
({ type, getMessage } = await saveReply(type, getMessage, false, title, swipes, reasoning));
|
||||
({ type, getMessage } = await saveReply({ type, getMessage, title, swipes, reasoning, imageUrl }));
|
||||
}
|
||||
else {
|
||||
({ type, getMessage } = await saveReply('appendFinal', getMessage, false, title, swipes, reasoning));
|
||||
({ type, getMessage } = await saveReply({ type: 'appendFinal', getMessage, title, swipes, reasoning, imageUrl }));
|
||||
}
|
||||
|
||||
// This relies on `saveReply` having been called to add the message to the chat, so it must be last.
|
||||
@ -5725,6 +5735,32 @@ function extractTitleFromData(data) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the image from the response data.
|
||||
* @param {object} data Response data
|
||||
* @param {object} [options] Extraction options
|
||||
* @param {string} [options.mainApi] Main API to use
|
||||
* @param {string} [options.chatCompletionSource] Chat completion source
|
||||
* @returns {string} Extracted image
|
||||
*/
|
||||
function extractImageFromData(data, { mainApi = null, chatCompletionSource = null } = {}) {
|
||||
switch (mainApi ?? main_api) {
|
||||
case 'openai': {
|
||||
switch (chatCompletionSource ?? oai_settings.chat_completion_source) {
|
||||
case chat_completion_sources.MAKERSUITE: {
|
||||
const inlineData = data?.responseContent?.parts?.find(x => x.inlineData)?.inlineData;
|
||||
if (inlineData) {
|
||||
return `data:${inlineData.mimeType};base64,${inlineData.data}`;
|
||||
}
|
||||
} break;
|
||||
|
||||
}
|
||||
} break;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* parseAndSaveLogprobs receives the full data response for a non-streaming
|
||||
* generation, parses logprobs for all tokens in the message, and saves them
|
||||
@ -5974,7 +6010,59 @@ export function cleanUpMessage(getMessage, isImpersonate, isContinue, displayInc
|
||||
return getMessage;
|
||||
}
|
||||
|
||||
export async function saveReply(type, getMessage, fromStreaming, title, swipes, reasoning) {
|
||||
/**
|
||||
* Adds an image to the message.
|
||||
* @param {object} message Message object
|
||||
* @param {object} sources Image sources
|
||||
* @param {ParsedImage} [sources.parsedImage] Parsed image
|
||||
* @param {string} [sources.imageUrl] Image URL
|
||||
*
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function processImageAttachment(message, { parsedImage, imageUrl }) {
|
||||
if (parsedImage?.image) {
|
||||
saveImageToMessage(parsedImage, message);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!imageUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
let url = imageUrl;
|
||||
if (isDataURL(url)) {
|
||||
const fileName = `inline_image_${Date.now().toString()}`;
|
||||
const [mime, base64] = /^data:(.*?);base64,(.*)$/.exec(imageUrl).slice(1);
|
||||
url = await saveBase64AsFile(base64, message.name, fileName, mime.split('/')[1]);
|
||||
}
|
||||
saveImageToMessage({ image: url, inline: true }, message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a resulting message to the chat.
|
||||
* @param {SaveReplyParams} params
|
||||
* @returns {Promise<SaveReplyResult>} Promise when the message is saved
|
||||
*
|
||||
* @typedef {object} SaveReplyParams
|
||||
* @property {string} type Type of generation
|
||||
* @property {string} getMessage Generated message
|
||||
* @property {boolean} [fromStreaming] If the message is from streaming
|
||||
* @property {string} [title] Message tooltip
|
||||
* @property {string[]} [swipes] Extra swipes
|
||||
* @property {string} [reasoning] Message reasoning
|
||||
* @property {string} [imageUrl] Link to an image
|
||||
*
|
||||
* @typedef {object} SaveReplyResult
|
||||
* @property {string} type Type of generation
|
||||
* @property {string} getMessage Generated message
|
||||
*/
|
||||
export async function saveReply({ type, getMessage, fromStreaming = false, title = '', swipes = [], reasoning = '', imageUrl = '' }) {
|
||||
// Backward compatibility
|
||||
if (arguments.length > 1 && typeof arguments[0] !== 'object') {
|
||||
console.trace('saveReply called with positional arguments. Please use an object instead.');
|
||||
[type, getMessage, fromStreaming, title, swipes, reasoning, imageUrl] = arguments;
|
||||
}
|
||||
|
||||
if (type != 'append' && type != 'continue' && type != 'appendFinal' && chat.length && (chat[chat.length - 1]['swipe_id'] === undefined ||
|
||||
chat[chat.length - 1]['is_user'])) {
|
||||
type = 'normal';
|
||||
@ -5995,8 +6083,8 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
|
||||
|
||||
let oldMessage = '';
|
||||
const generationFinished = new Date();
|
||||
const img = extractImageFromMessage(getMessage);
|
||||
getMessage = img.getMessage;
|
||||
const parsedImage = extractImageFromMessage(getMessage);
|
||||
getMessage = parsedImage.getMessage;
|
||||
if (type === 'swipe') {
|
||||
oldMessage = chat[chat.length - 1]['mes'];
|
||||
chat[chat.length - 1]['swipes'].length++;
|
||||
@ -6010,6 +6098,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
|
||||
chat[chat.length - 1]['extra']['model'] = getGeneratingModel();
|
||||
chat[chat.length - 1]['extra']['reasoning'] = reasoning;
|
||||
chat[chat.length - 1]['extra']['reasoning_duration'] = null;
|
||||
await processImageAttachment(chat[chat.length - 1], { parsedImage, imageUrl });
|
||||
if (power_user.message_token_count_enabled) {
|
||||
const tokenCountText = (reasoning || '') + chat[chat.length - 1]['mes'];
|
||||
chat[chat.length - 1]['extra']['token_count'] = await getTokenCountAsync(tokenCountText, 0);
|
||||
@ -6033,6 +6122,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
|
||||
chat[chat.length - 1]['extra']['model'] = getGeneratingModel();
|
||||
chat[chat.length - 1]['extra']['reasoning'] = reasoning;
|
||||
chat[chat.length - 1]['extra']['reasoning_duration'] = null;
|
||||
await processImageAttachment(chat[chat.length - 1], { parsedImage, imageUrl });
|
||||
if (power_user.message_token_count_enabled) {
|
||||
const tokenCountText = (reasoning || '') + chat[chat.length - 1]['mes'];
|
||||
chat[chat.length - 1]['extra']['token_count'] = await getTokenCountAsync(tokenCountText, 0);
|
||||
@ -6052,6 +6142,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
|
||||
chat[chat.length - 1]['extra']['api'] = getGeneratingApi();
|
||||
chat[chat.length - 1]['extra']['model'] = getGeneratingModel();
|
||||
chat[chat.length - 1]['extra']['reasoning'] += reasoning;
|
||||
await processImageAttachment(chat[chat.length - 1], { parsedImage, imageUrl });
|
||||
// We don't know if the reasoning duration extended, so we don't update it here on purpose.
|
||||
if (power_user.message_token_count_enabled) {
|
||||
const tokenCountText = (reasoning || '') + chat[chat.length - 1]['mes'];
|
||||
@ -6097,7 +6188,7 @@ export async function saveReply(type, getMessage, fromStreaming, title, swipes,
|
||||
chat[chat.length - 1]['extra']['gen_id'] = group_generation_id;
|
||||
}
|
||||
|
||||
saveImageToMessage(img, chat[chat.length - 1]);
|
||||
await processImageAttachment(chat[chat.length - 1], { parsedImage, imageUrl: imageUrl });
|
||||
const chat_id = (chat.length - 1);
|
||||
|
||||
!fromStreaming && await eventSource.emit(event_types.MESSAGE_RECEIVED, chat_id, type);
|
||||
@ -6203,6 +6294,12 @@ export function syncMesToSwipe(messageId = null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves the image to the message object.
|
||||
* @param {ParsedImage} img Image object
|
||||
* @param {object} mes Chat message object
|
||||
* @typedef {{ image?: string, title?: string, inline?: boolean }} ParsedImage
|
||||
*/
|
||||
function saveImageToMessage(img, mes) {
|
||||
if (mes && img.image) {
|
||||
if (!mes.extra || typeof mes.extra !== 'object') {
|
||||
@ -6210,6 +6307,7 @@ function saveImageToMessage(img, mes) {
|
||||
}
|
||||
mes.extra.image = img.image;
|
||||
mes.extra.title = img.title;
|
||||
mes.extra.inline_image = img.inline;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6252,7 +6350,7 @@ function extractImageFromMessage(getMessage) {
|
||||
const image = results ? results[1] : '';
|
||||
const title = results ? results[2] : '';
|
||||
getMessage = getMessage.replace(regex, '');
|
||||
return { getMessage, image, title };
|
||||
return { getMessage, image, title, inline: true };
|
||||
}
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user