From 20ab6193ab824fbd7bee25c9d8e3108a43fdc627 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 16 Mar 2025 21:17:53 +0200 Subject: [PATCH] Send multiple image swipes per prompt #3635 --- public/index.html | 5 +- public/script.js | 4 +- public/scripts/chats.js | 82 +++++++++++++++++++ .../extensions/stable-diffusion/index.js | 18 +++- public/scripts/openai.js | 76 +++++++++++------ public/style.css | 10 +++ 6 files changed, 165 insertions(+), 30 deletions(-) diff --git a/public/index.html b/public/index.html index 0753e3134..248aa1233 100644 --- a/public/index.html +++ b/public/index.html @@ -6432,10 +6432,13 @@
-
+
+
+
+
diff --git a/public/script.js b/public/script.js index 6ce80faba..4ed1c31e4 100644 --- a/public/script.js +++ b/public/script.js @@ -243,7 +243,7 @@ import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_set import { hideLoader, showLoader } from './scripts/loader.js'; import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js'; import { loadFeatherlessModels, loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels, initTextGenModels, loadTabbyModels, loadGenericModels } from './scripts/textgen-models.js'; -import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId, preserveNeutralChat, restoreNeutralChat } from './scripts/chats.js'; +import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId, preserveNeutralChat, restoreNeutralChat, IMAGE_PROMPT_TYPE } from './scripts/chats.js'; import { getPresetManager, initPresetManager } from './scripts/preset-manager.js'; import { evaluateMacros, getLastMessageId, initMacros } from './scripts/macros.js'; import { currentUser, setUserControls } from './scripts/user.js'; @@ -2320,6 +2320,7 @@ export function updateMessageBlock(messageId, message, { rerenderMessage = true export function appendMediaToMessage(mes, messageElement, adjustScroll = true) { // Add image to message if (mes.extra?.image) { + const promptType = mes.extra.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE; const container = messageElement.find('.mes_img_container'); const chatHeight = $('#chat').prop('scrollHeight'); const image = messageElement.find('.mes_img'); @@ -2339,6 +2340,7 @@ export function appendMediaToMessage(mes, messageElement, adjustScroll = true) { container.addClass('img_extra'); image.toggleClass('img_inline', isInline); text.toggleClass('displayNone', !isInline); + container.attr('data-img-prompt-type', promptType); const imageSwipes = mes.extra.image_swipes; if (Array.isArray(imageSwipes) && imageSwipes.length > 0) { diff --git a/public/scripts/chats.js b/public/scripts/chats.js index a6094b7f7..0d751336b 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -85,6 +85,16 @@ const converters = { 'application/vnd.oasis.opendocument.spreadsheet': extractTextFromOffice, }; +/** + * @enum {string} + * @readonly + */ +export const IMAGE_PROMPT_TYPE = Object.freeze({ + ALL: 'all', + ONE: 'one', + NONE: 'none', +}); + /** * Finds a matching key in the converters object. * @param {string} type MIME type @@ -201,8 +211,45 @@ export async function populateFileAttachment(message, inputId = 'file_form_input // If file is image if (file.type.startsWith('image/')) { + let addSwipe = false; + if (message.extra.image) { + const popupResult = await callGenericPopup(t`This message already has an image attached. Replace it?`, POPUP_TYPE.TEXT, '', { + okButton: t`Add swipe`, + customButtons: [ + { + text: t`Replace image`, + appendAtEnd: true, + result: POPUP_RESULT.CUSTOM1, + }, + { + text: t`Cancel`, + appendAtEnd: true, + result: POPUP_RESULT.CANCELLED, + }, + ], + }); + + if (!popupResult) { + return; + } + + addSwipe = popupResult === POPUP_RESULT.AFFIRMATIVE; + } + const extension = file.type.split('/')[1]; const imageUrl = await saveBase64AsFile(base64Data, name2, fileNamePrefix, extension); + if (addSwipe) { + if (!message.extra.image_swipes) { + message.extra.image_swipes = []; + } + + if (message.extra.image && !message.extra.image_swipes.includes(message.extra.image)) { + message.extra.image_swipes.push(message.extra.image); + } + + message.extra.image_swipes.push(imageUrl); + } + message.extra.image = imageUrl; message.extra.inline_image = true; } else { @@ -575,6 +622,40 @@ export function isExternalMediaAllowed() { return !power_user.forbid_external_media; } +function switchMessageImagePromptType() { + const mesBlock = $(this).closest('.mes'); + const mesId = mesBlock.attr('mesid'); + const message = chat[mesId]; + + if (!message) { + return; + } + + if (!message.extra) { + message.extra = {}; + } + + const existingType = message.extra.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE; + const typeValues = Object.values(IMAGE_PROMPT_TYPE); + const nextType = typeValues[(typeValues.findIndex(type => type === existingType) + 1) % typeValues.length]; + + message.extra.image_prompt_type = nextType; + appendMediaToMessage(message, mesBlock); + saveChatDebounced(); + + switch (nextType) { + case IMAGE_PROMPT_TYPE.ALL: + toastr.info(t`All image swipes will be sent in prompt for this message`); + break; + case IMAGE_PROMPT_TYPE.ONE: + toastr.info(t`Only the shown image swipe will be sent in prompt for this message`); + break; + case IMAGE_PROMPT_TYPE.NONE: + toastr.info(t`No images will be sent in prompt for this message`); + break; + } +} + async function enlargeMessageImage() { const mesBlock = $(this).closest('.mes'); const mesId = mesBlock.attr('mesid'); @@ -1605,6 +1686,7 @@ jQuery(function () { $(document).on('click', '.mes_img_enlarge', enlargeMessageImage); $(document).on('click', '.mes_img_delete', deleteMessageImage); + $(document).on('click', '.mes_img_prompt_type', switchMessageImagePromptType); $('#file_form_input').on('change', async () => { const fileInput = document.getElementById('file_form_input'); diff --git a/public/scripts/extensions/stable-diffusion/index.js b/public/scripts/extensions/stable-diffusion/index.js index dfae3f1c5..82986e4d9 100644 --- a/public/scripts/extensions/stable-diffusion/index.js +++ b/public/scripts/extensions/stable-diffusion/index.js @@ -2337,10 +2337,10 @@ function processReply(str) { str = str.replaceAll('“', ''); str = str.replaceAll('\n', ', '); str = str.normalize('NFD'); - + // Strip out non-alphanumeric characters barring model syntax exceptions str = str.replace(/[^a-zA-Z0-9.,:_(){}<>[\]\-'|#]+/g, ' '); - + str = str.replace(/\s+/g, ' '); // Collapse multiple whitespaces into one str = str.trim(); @@ -3234,7 +3234,7 @@ function getNovelParams() { extension_settings.sd.scheduler = 'karras'; } - if (extension_settings.sd.sampler === 'ddim' || + if (extension_settings.sd.sampler === 'ddim' || ['nai-diffusion-4-curated-preview', 'nai-diffusion-4-full'].includes(extension_settings.sd.model)) { sm = false; sm_dyn = false; @@ -4000,6 +4000,7 @@ async function onImageSwiped({ message, element, direction }) { } const currentIndex = swipes.indexOf(message.extra.image); + const canGenerate = !!message.extra.title; if (currentIndex === -1) { console.warn('Current image not found in the swipes'); @@ -4015,8 +4016,17 @@ async function onImageSwiped({ message, element, direction }) { appendMediaToMessage(message, element, false); } + // Wrap around at the end if the image is missing a prompt + if (direction === 'right' && !canGenerate) { + const newIndex = currentIndex === swipes.length - 1 ? 0 : currentIndex + 1; + message.extra.image = swipes[newIndex]; + + // Update the image in the message + appendMediaToMessage(message, element, false); + } + // Switch to next image or generate a new one if at the end - if (direction === 'right') { + if (direction === 'right' && canGenerate) { const newIndex = currentIndex === swipes.length - 1 ? swipes.length : currentIndex + 1; if (newIndex === swipes.length) { diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 112726562..2bea3b063 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -75,6 +75,7 @@ import { Popup, POPUP_RESULT } from './popup.js'; import { t } from './i18n.js'; import { ToolManager } from './tool-calling.js'; import { accountStorage } from './util/AccountStorage.js'; +import { IMAGE_PROMPT_TYPE } from './chats.js'; export { openai_messages_count, @@ -560,7 +561,17 @@ function setOpenAIMessages(chat) { const name = chat[j]['name']; const image = chat[j]?.extra?.image; const invocations = chat[j]?.extra?.tool_invocations; - messages[i] = { 'role': role, 'content': content, name: name, 'image': image, 'invocations': invocations }; + const imagePromptType = chat[j]?.extra?.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE; + const imageSwipes = chat[j]?.extra?.image_swipes ?? []; + messages[i] = { + role, + content, + name, + invocations, + image, + imagePromptType, + imageSwipes, + }; j++; } @@ -845,7 +856,7 @@ async function populateChatHistory(messages, prompts, chatCompletion, type = nul } if (imageInlining && chatPrompt.image) { - await chatMessage.addImage(chatPrompt.image); + await chatMessage.addImage(chatPrompt.image, chatPrompt.imagePromptType, chatPrompt.imageSwipes); } if (canUseTools && Array.isArray(chatPrompt.invocations)) { @@ -2602,38 +2613,55 @@ class Message { /** * Adds an image to the message. - * @param {string} image Image URL or Data URL. + * @param {string} sourceImage Image URL or Data URL. + * @param {string} imagePromptType Type of image prompt. + * @param {string[]} imageSwipes Swipes for the image. * @returns {Promise} */ - async addImage(image) { + async addImage(sourceImage, imagePromptType = IMAGE_PROMPT_TYPE.ONE, imageSwipes = []) { + const quality = oai_settings.inline_image_quality || default_settings.inline_image_quality; const textContent = this.content; - const isDataUrl = isDataURL(image); - if (!isDataUrl) { - try { - const response = await fetch(image, { method: 'GET', cache: 'force-cache' }); - if (!response.ok) throw new Error('Failed to fetch image'); - const blob = await response.blob(); - image = await getBase64Async(blob); - } catch (error) { - console.error('Image adding skipped', error); + const sourceImages = []; + + switch (imagePromptType) { + case IMAGE_PROMPT_TYPE.NONE: return; - } + case IMAGE_PROMPT_TYPE.ONE: + sourceImages.push(sourceImage); + break; + case IMAGE_PROMPT_TYPE.ALL: + sourceImages.push(...imageSwipes); + break; } - image = await this.compressImage(image); - - const quality = oai_settings.inline_image_quality || default_settings.inline_image_quality; this.content = [ { type: 'text', text: textContent }, - { type: 'image_url', image_url: { 'url': image, 'detail': quality } }, ]; - try { - const tokens = await this.getImageTokenCost(image, quality); - this.tokens += tokens; - } catch (error) { - this.tokens += Message.tokensPerImage; - console.error('Failed to get image token cost', error); + for (let image of sourceImages) { + const isDataUrl = isDataURL(image); + if (!isDataUrl) { + try { + const response = await fetch(image, { method: 'GET', cache: 'force-cache' }); + if (!response.ok) throw new Error('Failed to fetch image'); + const blob = await response.blob(); + image = await getBase64Async(blob); + } catch (error) { + console.error('Image adding skipped', error); + continue; + } + } + + image = await this.compressImage(image); + this.content.push({ type: 'image_url', image_url: { url: image, detail: quality } }); + + try { + const tokens = await this.getImageTokenCost(image, quality); + this.tokens += tokens; + } catch (error) { + this.tokens += Message.tokensPerImage; + console.error('Failed to get image token cost', error); + } } } diff --git a/public/style.css b/public/style.css index 7fa72d91b..aed17b70e 100644 --- a/public/style.css +++ b/public/style.css @@ -5098,6 +5098,16 @@ body:not(.sd) .mes_img_swipes { max-width: 100% !important; } +.mes_img_container div[data-img-prompt-type] { + display: none; +} + +.mes_img_container[data-img-prompt-type="none"] div[data-img-prompt-type="none"], +.mes_img_container[data-img-prompt-type="one"] div[data-img-prompt-type="one"], +.mes_img_container[data-img-prompt-type="all"] div[data-img-prompt-type="all"] { + display: initial; +} + /* Align the content of this span to the right */ .delete-button { margin-right: 10px;