From 7149f46c9aaadd31b8add999c06be4a5a7ac5888 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 30 Jun 2024 00:06:17 +0300 Subject: [PATCH] Add automatic image captioning mode --- public/script.js | 5 ++ public/scripts/chats.js | 3 + public/scripts/extensions/caption/index.js | 79 +++++++++++++++++-- .../scripts/extensions/caption/settings.html | 5 ++ 4 files changed, 85 insertions(+), 7 deletions(-) diff --git a/public/script.js b/public/script.js index 11c8279ae..64e917a93 100644 --- a/public/script.js +++ b/public/script.js @@ -408,6 +408,7 @@ export const event_types = { MESSAGE_EDITED: 'message_edited', MESSAGE_DELETED: 'message_deleted', MESSAGE_UPDATED: 'message_updated', + MESSAGE_FILE_EMBEDDED: 'message_file_embedded', IMPERSONATE_READY: 'impersonate_ready', CHAT_CHANGED: 'chat_id_changed', GENERATION_STARTED: 'generation_started', @@ -3404,6 +3405,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro let regexedMessage = getRegexedString(message, regexType, options); regexedMessage = await appendFileContent(chatItem, regexedMessage); + if (chatItem?.extra?.append_title && chatItem?.extra?.title) { + regexedMessage = `${regexedMessage}\n\n${chatItem.extra.title}`; + } + return { ...chatItem, mes: regexedMessage, diff --git a/public/scripts/chats.js b/public/scripts/chats.js index 9419e6d7b..faa56bd34 100644 --- a/public/scripts/chats.js +++ b/public/scripts/chats.js @@ -417,6 +417,7 @@ function embedMessageFile(messageId, messageBlock) { } await populateFileAttachment(message, 'embed_file_input'); + await eventSource.emit(event_types.MESSAGE_FILE_EMBEDDED, messageId); appendMediaToMessage(message, messageBlock); await saveChatConditional(); } @@ -614,6 +615,8 @@ async function deleteMessageImage() { const message = chat[mesId]; delete message.extra.image; delete message.extra.inline_image; + delete message.extra.title; + delete message.extra.append_title; mesBlock.find('.mes_img_container').removeClass('img_extra'); mesBlock.find('.mes_img').attr('src', ''); await saveChatConditional(); diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js index 960de7b32..c5797fad6 100644 --- a/public/scripts/extensions/caption/index.js +++ b/public/scripts/extensions/caption/index.js @@ -1,6 +1,6 @@ import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js'; import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js'; -import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js'; +import { callPopup, eventSource, event_types, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js'; import { getMessageTimeStamp } from '../../RossAscends-mods.js'; import { SECRET_KEYS, secret_state } from '../../secrets.js'; import { getMultimodalCaption } from '../shared.js'; @@ -84,12 +84,11 @@ async function setSpinnerIcon() { } /** - * Sends a captioned message to the chat. - * @param {string} caption Caption text - * @param {string} image Image URL + * Wraps a caption with a message template. + * @param {string} caption Raw caption + * @returns {Promise} Wrapped caption */ -async function sendCaptionedMessage(caption, image) { - const context = getContext(); +async function wrapCaptionTemplate(caption) { let template = extension_settings.caption.template || TEMPLATE_DEFAULT; if (!/{{caption}}/i.test(template)) { @@ -101,7 +100,7 @@ async function sendCaptionedMessage(caption, image) { if (extension_settings.caption.refine_mode) { messageText = await callPopup( - '

Review and edit the generated message:

Press "Cancel" to abort the caption sending.', + '

Review and edit the generated caption:

Press "Cancel" to abort the caption sending.', 'input', messageText, { rows: 5, okButton: 'Send' }); @@ -111,6 +110,55 @@ async function sendCaptionedMessage(caption, image) { } } + return messageText; +} + +/** + * Appends caption to an existing message. + * @param {Object} data Message data + * @returns {Promise} + */ +async function captionExistingMessage(data) { + if (!(data?.extra?.image)) { + return; + } + + const imageData = await fetch(data.extra.image); + const blob = await imageData.blob(); + const type = imageData.headers.get('Content-Type'); + const file = new File([blob], 'image.png', { type }); + const caption = await getCaptionForFile(file, null, true); + + if (!caption) { + console.warn('Failed to generate a caption for the image.'); + return; + } + + const wrappedCaption = await wrapCaptionTemplate(caption); + + const messageText = String(data.mes).trim(); + + if (!messageText) { + data.extra.inline_image = false; + data.mes = wrappedCaption; + data.extra.title = wrappedCaption; + } + else { + data.extra.inline_image = true; + data.extra.append_title = true; + data.extra.title = wrappedCaption; + } +} + +/** + * Sends a captioned message to the chat. + * @param {string} caption Caption text + * @param {string} image Image URL + */ +async function sendCaptionedMessage(caption, image) { + const messageText = await wrapCaptionTemplate(caption); + + const context = getContext(); const message = { name: context.name1, is_user: true, @@ -423,6 +471,7 @@ jQuery(async function () { $('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode)); $('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy)); $('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask)); + $('#caption_auto_mode').prop('checked', !!(extension_settings.caption.auto_mode)); $('#caption_source').val(extension_settings.caption.source); $('#caption_prompt').val(extension_settings.caption.prompt); $('#caption_template').val(extension_settings.caption.template); @@ -448,6 +497,22 @@ jQuery(async function () { extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked'); saveSettingsDebounced(); }); + $('#caption_auto_mode').on('input', () => { + extension_settings.caption.auto_mode = !!$('#caption_auto_mode').prop('checked'); + saveSettingsDebounced(); + }); + + const onMessageEvent = async (index) => { + if (!extension_settings.caption.auto_mode) { + return; + } + + const data = getContext().chat[index]; + await captionExistingMessage(data); + }; + + eventSource.on(event_types.MESSAGE_SENT, onMessageEvent); + eventSource.on(event_types.MESSAGE_FILE_EMBEDDED, onMessageEvent); SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption', callback: captionCommandCallback, diff --git a/public/scripts/extensions/caption/settings.html b/public/scripts/extensions/caption/settings.html index 185d76908..ccbdd67c0 100644 --- a/public/scripts/extensions/caption/settings.html +++ b/public/scripts/extensions/caption/settings.html @@ -92,6 +92,11 @@ +