Send multiple image swipes per prompt

#3635
This commit is contained in:
Cohee
2025-03-16 21:17:53 +02:00
parent d42a81f97c
commit 20ab6193ab
6 changed files with 165 additions and 30 deletions

View File

@@ -6432,10 +6432,13 @@
<div class="mes_reasoning"></div>
</details>
<div class="mes_text"></div>
<div class="mes_img_container">
<div class="mes_img_container" data-img-prompt-type="">
<div class="mes_img_controls">
<div title="Enlarge" class="right_menu_button fa-lg fa-solid fa-magnifying-glass mes_img_enlarge" data-i18n="[title]Enlarge"></div>
<div title="Caption" class="right_menu_button fa-lg fa-solid fa-envelope-open-text mes_img_caption" data-i18n="[title]Caption"></div>
<div title="Include just this swipe in prompts" class="right_menu_button fa-lg fa-solid fa-1 mes_img_prompt_type" data-img-prompt-type="one" data-i18n="[title]Include just this swipe in prompts"></div>
<div title="Include all swipes in prompts" class="right_menu_button fa-lg fa-solid fa-infinity mes_img_prompt_type" data-img-prompt-type="all" data-i18n="[title]Include all swipes in prompts"></div>
<div title="Exclude this image from prompts" class="right_menu_button fa-lg fa-solid fa-0 mes_img_prompt_type" data-img-prompt-type="none" data-i18n="[title]Exclude this image from prompts"></div>
<div title="Delete" class="right_menu_button fa-lg fa-solid fa-trash-can mes_img_delete" data-i18n="[title]Delete"></div>
</div>
<div class="mes_img_swipes">

View File

@@ -243,7 +243,7 @@ import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_set
import { hideLoader, showLoader } from './scripts/loader.js';
import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
import { loadFeatherlessModels, loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels, initTextGenModels, loadTabbyModels, loadGenericModels } from './scripts/textgen-models.js';
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId, preserveNeutralChat, restoreNeutralChat } from './scripts/chats.js';
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId, preserveNeutralChat, restoreNeutralChat, IMAGE_PROMPT_TYPE } from './scripts/chats.js';
import { getPresetManager, initPresetManager } from './scripts/preset-manager.js';
import { evaluateMacros, getLastMessageId, initMacros } from './scripts/macros.js';
import { currentUser, setUserControls } from './scripts/user.js';
@@ -2320,6 +2320,7 @@ export function updateMessageBlock(messageId, message, { rerenderMessage = true
export function appendMediaToMessage(mes, messageElement, adjustScroll = true) {
// Add image to message
if (mes.extra?.image) {
const promptType = mes.extra.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE;
const container = messageElement.find('.mes_img_container');
const chatHeight = $('#chat').prop('scrollHeight');
const image = messageElement.find('.mes_img');
@@ -2339,6 +2340,7 @@ export function appendMediaToMessage(mes, messageElement, adjustScroll = true) {
container.addClass('img_extra');
image.toggleClass('img_inline', isInline);
text.toggleClass('displayNone', !isInline);
container.attr('data-img-prompt-type', promptType);
const imageSwipes = mes.extra.image_swipes;
if (Array.isArray(imageSwipes) && imageSwipes.length > 0) {

View File

@@ -85,6 +85,16 @@ const converters = {
'application/vnd.oasis.opendocument.spreadsheet': extractTextFromOffice,
};
/**
* @enum {string}
* @readonly
*/
export const IMAGE_PROMPT_TYPE = Object.freeze({
ALL: 'all',
ONE: 'one',
NONE: 'none',
});
/**
* Finds a matching key in the converters object.
* @param {string} type MIME type
@@ -201,8 +211,45 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
// If file is image
if (file.type.startsWith('image/')) {
let addSwipe = false;
if (message.extra.image) {
const popupResult = await callGenericPopup(t`This message already has an image attached. Replace it?`, POPUP_TYPE.TEXT, '', {
okButton: t`Add swipe`,
customButtons: [
{
text: t`Replace image`,
appendAtEnd: true,
result: POPUP_RESULT.CUSTOM1,
},
{
text: t`Cancel`,
appendAtEnd: true,
result: POPUP_RESULT.CANCELLED,
},
],
});
if (!popupResult) {
return;
}
addSwipe = popupResult === POPUP_RESULT.AFFIRMATIVE;
}
const extension = file.type.split('/')[1];
const imageUrl = await saveBase64AsFile(base64Data, name2, fileNamePrefix, extension);
if (addSwipe) {
if (!message.extra.image_swipes) {
message.extra.image_swipes = [];
}
if (message.extra.image && !message.extra.image_swipes.includes(message.extra.image)) {
message.extra.image_swipes.push(message.extra.image);
}
message.extra.image_swipes.push(imageUrl);
}
message.extra.image = imageUrl;
message.extra.inline_image = true;
} else {
@@ -575,6 +622,40 @@ export function isExternalMediaAllowed() {
return !power_user.forbid_external_media;
}
function switchMessageImagePromptType() {
const mesBlock = $(this).closest('.mes');
const mesId = mesBlock.attr('mesid');
const message = chat[mesId];
if (!message) {
return;
}
if (!message.extra) {
message.extra = {};
}
const existingType = message.extra.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE;
const typeValues = Object.values(IMAGE_PROMPT_TYPE);
const nextType = typeValues[(typeValues.findIndex(type => type === existingType) + 1) % typeValues.length];
message.extra.image_prompt_type = nextType;
appendMediaToMessage(message, mesBlock);
saveChatDebounced();
switch (nextType) {
case IMAGE_PROMPT_TYPE.ALL:
toastr.info(t`All image swipes will be sent in prompt for this message`);
break;
case IMAGE_PROMPT_TYPE.ONE:
toastr.info(t`Only the shown image swipe will be sent in prompt for this message`);
break;
case IMAGE_PROMPT_TYPE.NONE:
toastr.info(t`No images will be sent in prompt for this message`);
break;
}
}
async function enlargeMessageImage() {
const mesBlock = $(this).closest('.mes');
const mesId = mesBlock.attr('mesid');
@@ -1605,6 +1686,7 @@ jQuery(function () {
$(document).on('click', '.mes_img_enlarge', enlargeMessageImage);
$(document).on('click', '.mes_img_delete', deleteMessageImage);
$(document).on('click', '.mes_img_prompt_type', switchMessageImagePromptType);
$('#file_form_input').on('change', async () => {
const fileInput = document.getElementById('file_form_input');

View File

@@ -2337,10 +2337,10 @@ function processReply(str) {
str = str.replaceAll('“', '');
str = str.replaceAll('\n', ', ');
str = str.normalize('NFD');
// Strip out non-alphanumeric characters barring model syntax exceptions
str = str.replace(/[^a-zA-Z0-9.,:_(){}<>[\]\-'|#]+/g, ' ');
str = str.replace(/\s+/g, ' '); // Collapse multiple whitespaces into one
str = str.trim();
@@ -3234,7 +3234,7 @@ function getNovelParams() {
extension_settings.sd.scheduler = 'karras';
}
if (extension_settings.sd.sampler === 'ddim' ||
if (extension_settings.sd.sampler === 'ddim' ||
['nai-diffusion-4-curated-preview', 'nai-diffusion-4-full'].includes(extension_settings.sd.model)) {
sm = false;
sm_dyn = false;
@@ -4000,6 +4000,7 @@ async function onImageSwiped({ message, element, direction }) {
}
const currentIndex = swipes.indexOf(message.extra.image);
const canGenerate = !!message.extra.title;
if (currentIndex === -1) {
console.warn('Current image not found in the swipes');
@@ -4015,8 +4016,17 @@ async function onImageSwiped({ message, element, direction }) {
appendMediaToMessage(message, element, false);
}
// Wrap around at the end if the image is missing a prompt
if (direction === 'right' && !canGenerate) {
const newIndex = currentIndex === swipes.length - 1 ? 0 : currentIndex + 1;
message.extra.image = swipes[newIndex];
// Update the image in the message
appendMediaToMessage(message, element, false);
}
// Switch to next image or generate a new one if at the end
if (direction === 'right') {
if (direction === 'right' && canGenerate) {
const newIndex = currentIndex === swipes.length - 1 ? swipes.length : currentIndex + 1;
if (newIndex === swipes.length) {

View File

@@ -75,6 +75,7 @@ import { Popup, POPUP_RESULT } from './popup.js';
import { t } from './i18n.js';
import { ToolManager } from './tool-calling.js';
import { accountStorage } from './util/AccountStorage.js';
import { IMAGE_PROMPT_TYPE } from './chats.js';
export {
openai_messages_count,
@@ -560,7 +561,17 @@ function setOpenAIMessages(chat) {
const name = chat[j]['name'];
const image = chat[j]?.extra?.image;
const invocations = chat[j]?.extra?.tool_invocations;
messages[i] = { 'role': role, 'content': content, name: name, 'image': image, 'invocations': invocations };
const imagePromptType = chat[j]?.extra?.image_prompt_type ?? IMAGE_PROMPT_TYPE.ONE;
const imageSwipes = chat[j]?.extra?.image_swipes ?? [];
messages[i] = {
role,
content,
name,
invocations,
image,
imagePromptType,
imageSwipes,
};
j++;
}
@@ -845,7 +856,7 @@ async function populateChatHistory(messages, prompts, chatCompletion, type = nul
}
if (imageInlining && chatPrompt.image) {
await chatMessage.addImage(chatPrompt.image);
await chatMessage.addImage(chatPrompt.image, chatPrompt.imagePromptType, chatPrompt.imageSwipes);
}
if (canUseTools && Array.isArray(chatPrompt.invocations)) {
@@ -2602,38 +2613,55 @@ class Message {
/**
* Adds an image to the message.
* @param {string} image Image URL or Data URL.
* @param {string} sourceImage Image URL or Data URL.
* @param {string} imagePromptType Type of image prompt.
* @param {string[]} imageSwipes Swipes for the image.
* @returns {Promise<void>}
*/
async addImage(image) {
async addImage(sourceImage, imagePromptType = IMAGE_PROMPT_TYPE.ONE, imageSwipes = []) {
const quality = oai_settings.inline_image_quality || default_settings.inline_image_quality;
const textContent = this.content;
const isDataUrl = isDataURL(image);
if (!isDataUrl) {
try {
const response = await fetch(image, { method: 'GET', cache: 'force-cache' });
if (!response.ok) throw new Error('Failed to fetch image');
const blob = await response.blob();
image = await getBase64Async(blob);
} catch (error) {
console.error('Image adding skipped', error);
const sourceImages = [];
switch (imagePromptType) {
case IMAGE_PROMPT_TYPE.NONE:
return;
}
case IMAGE_PROMPT_TYPE.ONE:
sourceImages.push(sourceImage);
break;
case IMAGE_PROMPT_TYPE.ALL:
sourceImages.push(...imageSwipes);
break;
}
image = await this.compressImage(image);
const quality = oai_settings.inline_image_quality || default_settings.inline_image_quality;
this.content = [
{ type: 'text', text: textContent },
{ type: 'image_url', image_url: { 'url': image, 'detail': quality } },
];
try {
const tokens = await this.getImageTokenCost(image, quality);
this.tokens += tokens;
} catch (error) {
this.tokens += Message.tokensPerImage;
console.error('Failed to get image token cost', error);
for (let image of sourceImages) {
const isDataUrl = isDataURL(image);
if (!isDataUrl) {
try {
const response = await fetch(image, { method: 'GET', cache: 'force-cache' });
if (!response.ok) throw new Error('Failed to fetch image');
const blob = await response.blob();
image = await getBase64Async(blob);
} catch (error) {
console.error('Image adding skipped', error);
continue;
}
}
image = await this.compressImage(image);
this.content.push({ type: 'image_url', image_url: { url: image, detail: quality } });
try {
const tokens = await this.getImageTokenCost(image, quality);
this.tokens += tokens;
} catch (error) {
this.tokens += Message.tokensPerImage;
console.error('Failed to get image token cost', error);
}
}
}

View File

@@ -5098,6 +5098,16 @@ body:not(.sd) .mes_img_swipes {
max-width: 100% !important;
}
.mes_img_container div[data-img-prompt-type] {
display: none;
}
.mes_img_container[data-img-prompt-type="none"] div[data-img-prompt-type="none"],
.mes_img_container[data-img-prompt-type="one"] div[data-img-prompt-type="one"],
.mes_img_container[data-img-prompt-type="all"] div[data-img-prompt-type="all"] {
display: initial;
}
/* Align the content of this span to the right */
.delete-button {
margin-right: 10px;