Add automatic image captioning mode

This commit is contained in:
Cohee 2024-06-30 00:06:17 +03:00
parent 2670709237
commit 7149f46c9a
4 changed files with 85 additions and 7 deletions

View File

@ -408,6 +408,7 @@ export const event_types = {
MESSAGE_EDITED: 'message_edited',
MESSAGE_DELETED: 'message_deleted',
MESSAGE_UPDATED: 'message_updated',
MESSAGE_FILE_EMBEDDED: 'message_file_embedded',
IMPERSONATE_READY: 'impersonate_ready',
CHAT_CHANGED: 'chat_id_changed',
GENERATION_STARTED: 'generation_started',
@ -3404,6 +3405,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro
let regexedMessage = getRegexedString(message, regexType, options);
regexedMessage = await appendFileContent(chatItem, regexedMessage);
if (chatItem?.extra?.append_title && chatItem?.extra?.title) {
regexedMessage = `${regexedMessage}\n\n${chatItem.extra.title}`;
}
return {
...chatItem,
mes: regexedMessage,

View File

@ -417,6 +417,7 @@ function embedMessageFile(messageId, messageBlock) {
}
await populateFileAttachment(message, 'embed_file_input');
await eventSource.emit(event_types.MESSAGE_FILE_EMBEDDED, messageId);
appendMediaToMessage(message, messageBlock);
await saveChatConditional();
}
@ -614,6 +615,8 @@ async function deleteMessageImage() {
const message = chat[mesId];
delete message.extra.image;
delete message.extra.inline_image;
delete message.extra.title;
delete message.extra.append_title;
mesBlock.find('.mes_img_container').removeClass('img_extra');
mesBlock.find('.mes_img').attr('src', '');
await saveChatConditional();

View File

@ -1,6 +1,6 @@
import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js';
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js';
import { callPopup, eventSource, event_types, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js';
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getMultimodalCaption } from '../shared.js';
@ -84,12 +84,11 @@ async function setSpinnerIcon() {
}
/**
* Sends a captioned message to the chat.
* @param {string} caption Caption text
* @param {string} image Image URL
* Wraps a caption with a message template.
* @param {string} caption Raw caption
* @returns {Promise<string>} Wrapped caption
*/
async function sendCaptionedMessage(caption, image) {
const context = getContext();
async function wrapCaptionTemplate(caption) {
let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
if (!/{{caption}}/i.test(template)) {
@ -101,7 +100,7 @@ async function sendCaptionedMessage(caption, image) {
if (extension_settings.caption.refine_mode) {
messageText = await callPopup(
'<h3>Review and edit the generated message:</h3>Press "Cancel" to abort the caption sending.',
'<h3>Review and edit the generated caption:</h3>Press "Cancel" to abort the caption sending.',
'input',
messageText,
{ rows: 5, okButton: 'Send' });
@ -111,6 +110,55 @@ async function sendCaptionedMessage(caption, image) {
}
}
return messageText;
}
/**
* Appends caption to an existing message.
* @param {Object} data Message data
* @returns {Promise<void>}
*/
async function captionExistingMessage(data) {
if (!(data?.extra?.image)) {
return;
}
const imageData = await fetch(data.extra.image);
const blob = await imageData.blob();
const type = imageData.headers.get('Content-Type');
const file = new File([blob], 'image.png', { type });
const caption = await getCaptionForFile(file, null, true);
if (!caption) {
console.warn('Failed to generate a caption for the image.');
return;
}
const wrappedCaption = await wrapCaptionTemplate(caption);
const messageText = String(data.mes).trim();
if (!messageText) {
data.extra.inline_image = false;
data.mes = wrappedCaption;
data.extra.title = wrappedCaption;
}
else {
data.extra.inline_image = true;
data.extra.append_title = true;
data.extra.title = wrappedCaption;
}
}
/**
* Sends a captioned message to the chat.
* @param {string} caption Caption text
* @param {string} image Image URL
*/
async function sendCaptionedMessage(caption, image) {
const messageText = await wrapCaptionTemplate(caption);
const context = getContext();
const message = {
name: context.name1,
is_user: true,
@ -423,6 +471,7 @@ jQuery(async function () {
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
$('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask));
$('#caption_auto_mode').prop('checked', !!(extension_settings.caption.auto_mode));
$('#caption_source').val(extension_settings.caption.source);
$('#caption_prompt').val(extension_settings.caption.prompt);
$('#caption_template').val(extension_settings.caption.template);
@ -448,6 +497,22 @@ jQuery(async function () {
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
saveSettingsDebounced();
});
$('#caption_auto_mode').on('input', () => {
extension_settings.caption.auto_mode = !!$('#caption_auto_mode').prop('checked');
saveSettingsDebounced();
});
const onMessageEvent = async (index) => {
if (!extension_settings.caption.auto_mode) {
return;
}
const data = getContext().chat[index];
await captionExistingMessage(data);
};
eventSource.on(event_types.MESSAGE_SENT, onMessageEvent);
eventSource.on(event_types.MESSAGE_FILE_EMBEDDED, onMessageEvent);
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption',
callback: captionCommandCallback,

View File

@ -92,6 +92,11 @@
</div>
<label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>&lcub;&lcub;caption&rcub;&rcub;</code> <span data-i18n="macro)">macro)</span></small></label>
<textarea id="caption_template" class="text_pole" rows="2" placeholder="&lt; Use default &gt;">{{TEMPLATE_DEFAULT}}</textarea>
<label class="checkbox_label" for="caption_auto_mode">
<input id="caption_auto_mode" type="checkbox" class="checkbox">
<span data-i18n="Automatically caption images">Automatically caption images</span>
<i class="fa-solid fa-info-circle" title="Automatically caption images when they are pasted into the chat or attached to messages."></i>
</label>
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
<input id="caption_refine_mode" type="checkbox" class="checkbox">
<span data-i18n="Edit captions before saving">Edit captions before saving</span>