Add automatic image captioning mode

This commit is contained in:
Cohee 2024-06-30 00:06:17 +03:00
parent 2670709237
commit 7149f46c9a
4 changed files with 85 additions and 7 deletions

View File

@ -408,6 +408,7 @@ export const event_types = {
MESSAGE_EDITED: 'message_edited', MESSAGE_EDITED: 'message_edited',
MESSAGE_DELETED: 'message_deleted', MESSAGE_DELETED: 'message_deleted',
MESSAGE_UPDATED: 'message_updated', MESSAGE_UPDATED: 'message_updated',
MESSAGE_FILE_EMBEDDED: 'message_file_embedded',
IMPERSONATE_READY: 'impersonate_ready', IMPERSONATE_READY: 'impersonate_ready',
CHAT_CHANGED: 'chat_id_changed', CHAT_CHANGED: 'chat_id_changed',
GENERATION_STARTED: 'generation_started', GENERATION_STARTED: 'generation_started',
@ -3404,6 +3405,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro
let regexedMessage = getRegexedString(message, regexType, options); let regexedMessage = getRegexedString(message, regexType, options);
regexedMessage = await appendFileContent(chatItem, regexedMessage); regexedMessage = await appendFileContent(chatItem, regexedMessage);
if (chatItem?.extra?.append_title && chatItem?.extra?.title) {
regexedMessage = `${regexedMessage}\n\n${chatItem.extra.title}`;
}
return { return {
...chatItem, ...chatItem,
mes: regexedMessage, mes: regexedMessage,

View File

@ -417,6 +417,7 @@ function embedMessageFile(messageId, messageBlock) {
} }
await populateFileAttachment(message, 'embed_file_input'); await populateFileAttachment(message, 'embed_file_input');
await eventSource.emit(event_types.MESSAGE_FILE_EMBEDDED, messageId);
appendMediaToMessage(message, messageBlock); appendMediaToMessage(message, messageBlock);
await saveChatConditional(); await saveChatConditional();
} }
@ -614,6 +615,8 @@ async function deleteMessageImage() {
const message = chat[mesId]; const message = chat[mesId];
delete message.extra.image; delete message.extra.image;
delete message.extra.inline_image; delete message.extra.inline_image;
delete message.extra.title;
delete message.extra.append_title;
mesBlock.find('.mes_img_container').removeClass('img_extra'); mesBlock.find('.mes_img_container').removeClass('img_extra');
mesBlock.find('.mes_img').attr('src', ''); mesBlock.find('.mes_img').attr('src', '');
await saveChatConditional(); await saveChatConditional();

View File

@ -1,6 +1,6 @@
import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js'; import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js'; import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js';
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js'; import { callPopup, eventSource, event_types, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js';
import { getMessageTimeStamp } from '../../RossAscends-mods.js'; import { getMessageTimeStamp } from '../../RossAscends-mods.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js'; import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getMultimodalCaption } from '../shared.js'; import { getMultimodalCaption } from '../shared.js';
@ -84,12 +84,11 @@ async function setSpinnerIcon() {
} }
/** /**
* Sends a captioned message to the chat. * Wraps a caption with a message template.
* @param {string} caption Caption text * @param {string} caption Raw caption
* @param {string} image Image URL * @returns {Promise<string>} Wrapped caption
*/ */
async function sendCaptionedMessage(caption, image) { async function wrapCaptionTemplate(caption) {
const context = getContext();
let template = extension_settings.caption.template || TEMPLATE_DEFAULT; let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
if (!/{{caption}}/i.test(template)) { if (!/{{caption}}/i.test(template)) {
@ -101,7 +100,7 @@ async function sendCaptionedMessage(caption, image) {
if (extension_settings.caption.refine_mode) { if (extension_settings.caption.refine_mode) {
messageText = await callPopup( messageText = await callPopup(
'<h3>Review and edit the generated message:</h3>Press "Cancel" to abort the caption sending.', '<h3>Review and edit the generated caption:</h3>Press "Cancel" to abort the caption sending.',
'input', 'input',
messageText, messageText,
{ rows: 5, okButton: 'Send' }); { rows: 5, okButton: 'Send' });
@ -111,6 +110,55 @@ async function sendCaptionedMessage(caption, image) {
} }
} }
return messageText;
}
/**
* Appends caption to an existing message.
* @param {Object} data Message data
* @returns {Promise<void>}
*/
async function captionExistingMessage(data) {
if (!(data?.extra?.image)) {
return;
}
const imageData = await fetch(data.extra.image);
const blob = await imageData.blob();
const type = imageData.headers.get('Content-Type');
const file = new File([blob], 'image.png', { type });
const caption = await getCaptionForFile(file, null, true);
if (!caption) {
console.warn('Failed to generate a caption for the image.');
return;
}
const wrappedCaption = await wrapCaptionTemplate(caption);
const messageText = String(data.mes).trim();
if (!messageText) {
data.extra.inline_image = false;
data.mes = wrappedCaption;
data.extra.title = wrappedCaption;
}
else {
data.extra.inline_image = true;
data.extra.append_title = true;
data.extra.title = wrappedCaption;
}
}
/**
* Sends a captioned message to the chat.
* @param {string} caption Caption text
* @param {string} image Image URL
*/
async function sendCaptionedMessage(caption, image) {
const messageText = await wrapCaptionTemplate(caption);
const context = getContext();
const message = { const message = {
name: context.name1, name: context.name1,
is_user: true, is_user: true,
@ -423,6 +471,7 @@ jQuery(async function () {
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode)); $('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy)); $('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
$('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask)); $('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask));
$('#caption_auto_mode').prop('checked', !!(extension_settings.caption.auto_mode));
$('#caption_source').val(extension_settings.caption.source); $('#caption_source').val(extension_settings.caption.source);
$('#caption_prompt').val(extension_settings.caption.prompt); $('#caption_prompt').val(extension_settings.caption.prompt);
$('#caption_template').val(extension_settings.caption.template); $('#caption_template').val(extension_settings.caption.template);
@ -448,6 +497,22 @@ jQuery(async function () {
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked'); extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
saveSettingsDebounced(); saveSettingsDebounced();
}); });
$('#caption_auto_mode').on('input', () => {
extension_settings.caption.auto_mode = !!$('#caption_auto_mode').prop('checked');
saveSettingsDebounced();
});
const onMessageEvent = async (index) => {
if (!extension_settings.caption.auto_mode) {
return;
}
const data = getContext().chat[index];
await captionExistingMessage(data);
};
eventSource.on(event_types.MESSAGE_SENT, onMessageEvent);
eventSource.on(event_types.MESSAGE_FILE_EMBEDDED, onMessageEvent);
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption', SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption',
callback: captionCommandCallback, callback: captionCommandCallback,

View File

@ -92,6 +92,11 @@
</div> </div>
<label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>&lcub;&lcub;caption&rcub;&rcub;</code> <span data-i18n="macro)">macro)</span></small></label> <label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>&lcub;&lcub;caption&rcub;&rcub;</code> <span data-i18n="macro)">macro)</span></small></label>
<textarea id="caption_template" class="text_pole" rows="2" placeholder="&lt; Use default &gt;">{{TEMPLATE_DEFAULT}}</textarea> <textarea id="caption_template" class="text_pole" rows="2" placeholder="&lt; Use default &gt;">{{TEMPLATE_DEFAULT}}</textarea>
<label class="checkbox_label" for="caption_auto_mode">
<input id="caption_auto_mode" type="checkbox" class="checkbox">
<span data-i18n="Automatically caption images">Automatically caption images</span>
<i class="fa-solid fa-info-circle" title="Automatically caption images when they are pasted into the chat or attached to messages."></i>
</label>
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode"> <label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
<input id="caption_refine_mode" type="checkbox" class="checkbox"> <input id="caption_refine_mode" type="checkbox" class="checkbox">
<span data-i18n="Edit captions before saving">Edit captions before saving</span> <span data-i18n="Edit captions before saving">Edit captions before saving</span>