Add automatic image captioning mode
This commit is contained in:
parent
2670709237
commit
7149f46c9a
|
@ -408,6 +408,7 @@ export const event_types = {
|
||||||
MESSAGE_EDITED: 'message_edited',
|
MESSAGE_EDITED: 'message_edited',
|
||||||
MESSAGE_DELETED: 'message_deleted',
|
MESSAGE_DELETED: 'message_deleted',
|
||||||
MESSAGE_UPDATED: 'message_updated',
|
MESSAGE_UPDATED: 'message_updated',
|
||||||
|
MESSAGE_FILE_EMBEDDED: 'message_file_embedded',
|
||||||
IMPERSONATE_READY: 'impersonate_ready',
|
IMPERSONATE_READY: 'impersonate_ready',
|
||||||
CHAT_CHANGED: 'chat_id_changed',
|
CHAT_CHANGED: 'chat_id_changed',
|
||||||
GENERATION_STARTED: 'generation_started',
|
GENERATION_STARTED: 'generation_started',
|
||||||
|
@ -3404,6 +3405,10 @@ export async function Generate(type, { automatic_trigger, force_name2, quiet_pro
|
||||||
let regexedMessage = getRegexedString(message, regexType, options);
|
let regexedMessage = getRegexedString(message, regexType, options);
|
||||||
regexedMessage = await appendFileContent(chatItem, regexedMessage);
|
regexedMessage = await appendFileContent(chatItem, regexedMessage);
|
||||||
|
|
||||||
|
if (chatItem?.extra?.append_title && chatItem?.extra?.title) {
|
||||||
|
regexedMessage = `${regexedMessage}\n\n${chatItem.extra.title}`;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...chatItem,
|
...chatItem,
|
||||||
mes: regexedMessage,
|
mes: regexedMessage,
|
||||||
|
|
|
@ -417,6 +417,7 @@ function embedMessageFile(messageId, messageBlock) {
|
||||||
}
|
}
|
||||||
|
|
||||||
await populateFileAttachment(message, 'embed_file_input');
|
await populateFileAttachment(message, 'embed_file_input');
|
||||||
|
await eventSource.emit(event_types.MESSAGE_FILE_EMBEDDED, messageId);
|
||||||
appendMediaToMessage(message, messageBlock);
|
appendMediaToMessage(message, messageBlock);
|
||||||
await saveChatConditional();
|
await saveChatConditional();
|
||||||
}
|
}
|
||||||
|
@ -614,6 +615,8 @@ async function deleteMessageImage() {
|
||||||
const message = chat[mesId];
|
const message = chat[mesId];
|
||||||
delete message.extra.image;
|
delete message.extra.image;
|
||||||
delete message.extra.inline_image;
|
delete message.extra.inline_image;
|
||||||
|
delete message.extra.title;
|
||||||
|
delete message.extra.append_title;
|
||||||
mesBlock.find('.mes_img_container').removeClass('img_extra');
|
mesBlock.find('.mes_img_container').removeClass('img_extra');
|
||||||
mesBlock.find('.mes_img').attr('src', '');
|
mesBlock.find('.mes_img').attr('src', '');
|
||||||
await saveChatConditional();
|
await saveChatConditional();
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
|
import { ensureImageFormatSupported, getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
|
||||||
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js';
|
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules, renderExtensionTemplateAsync } from '../../extensions.js';
|
||||||
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js';
|
import { callPopup, eventSource, event_types, getRequestHeaders, saveSettingsDebounced, substituteParamsExtended } from '../../../script.js';
|
||||||
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
|
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
|
||||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||||
import { getMultimodalCaption } from '../shared.js';
|
import { getMultimodalCaption } from '../shared.js';
|
||||||
|
@ -84,12 +84,11 @@ async function setSpinnerIcon() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sends a captioned message to the chat.
|
* Wraps a caption with a message template.
|
||||||
* @param {string} caption Caption text
|
* @param {string} caption Raw caption
|
||||||
* @param {string} image Image URL
|
* @returns {Promise<string>} Wrapped caption
|
||||||
*/
|
*/
|
||||||
async function sendCaptionedMessage(caption, image) {
|
async function wrapCaptionTemplate(caption) {
|
||||||
const context = getContext();
|
|
||||||
let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
|
let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
|
||||||
|
|
||||||
if (!/{{caption}}/i.test(template)) {
|
if (!/{{caption}}/i.test(template)) {
|
||||||
|
@ -101,7 +100,7 @@ async function sendCaptionedMessage(caption, image) {
|
||||||
|
|
||||||
if (extension_settings.caption.refine_mode) {
|
if (extension_settings.caption.refine_mode) {
|
||||||
messageText = await callPopup(
|
messageText = await callPopup(
|
||||||
'<h3>Review and edit the generated message:</h3>Press "Cancel" to abort the caption sending.',
|
'<h3>Review and edit the generated caption:</h3>Press "Cancel" to abort the caption sending.',
|
||||||
'input',
|
'input',
|
||||||
messageText,
|
messageText,
|
||||||
{ rows: 5, okButton: 'Send' });
|
{ rows: 5, okButton: 'Send' });
|
||||||
|
@ -111,6 +110,55 @@ async function sendCaptionedMessage(caption, image) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return messageText;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Appends caption to an existing message.
|
||||||
|
* @param {Object} data Message data
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async function captionExistingMessage(data) {
|
||||||
|
if (!(data?.extra?.image)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const imageData = await fetch(data.extra.image);
|
||||||
|
const blob = await imageData.blob();
|
||||||
|
const type = imageData.headers.get('Content-Type');
|
||||||
|
const file = new File([blob], 'image.png', { type });
|
||||||
|
const caption = await getCaptionForFile(file, null, true);
|
||||||
|
|
||||||
|
if (!caption) {
|
||||||
|
console.warn('Failed to generate a caption for the image.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const wrappedCaption = await wrapCaptionTemplate(caption);
|
||||||
|
|
||||||
|
const messageText = String(data.mes).trim();
|
||||||
|
|
||||||
|
if (!messageText) {
|
||||||
|
data.extra.inline_image = false;
|
||||||
|
data.mes = wrappedCaption;
|
||||||
|
data.extra.title = wrappedCaption;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
data.extra.inline_image = true;
|
||||||
|
data.extra.append_title = true;
|
||||||
|
data.extra.title = wrappedCaption;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends a captioned message to the chat.
|
||||||
|
* @param {string} caption Caption text
|
||||||
|
* @param {string} image Image URL
|
||||||
|
*/
|
||||||
|
async function sendCaptionedMessage(caption, image) {
|
||||||
|
const messageText = await wrapCaptionTemplate(caption);
|
||||||
|
|
||||||
|
const context = getContext();
|
||||||
const message = {
|
const message = {
|
||||||
name: context.name1,
|
name: context.name1,
|
||||||
is_user: true,
|
is_user: true,
|
||||||
|
@ -423,6 +471,7 @@ jQuery(async function () {
|
||||||
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
||||||
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
|
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
|
||||||
$('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask));
|
$('#caption_prompt_ask').prop('checked', !!(extension_settings.caption.prompt_ask));
|
||||||
|
$('#caption_auto_mode').prop('checked', !!(extension_settings.caption.auto_mode));
|
||||||
$('#caption_source').val(extension_settings.caption.source);
|
$('#caption_source').val(extension_settings.caption.source);
|
||||||
$('#caption_prompt').val(extension_settings.caption.prompt);
|
$('#caption_prompt').val(extension_settings.caption.prompt);
|
||||||
$('#caption_template').val(extension_settings.caption.template);
|
$('#caption_template').val(extension_settings.caption.template);
|
||||||
|
@ -448,6 +497,22 @@ jQuery(async function () {
|
||||||
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
|
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
|
$('#caption_auto_mode').on('input', () => {
|
||||||
|
extension_settings.caption.auto_mode = !!$('#caption_auto_mode').prop('checked');
|
||||||
|
saveSettingsDebounced();
|
||||||
|
});
|
||||||
|
|
||||||
|
const onMessageEvent = async (index) => {
|
||||||
|
if (!extension_settings.caption.auto_mode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = getContext().chat[index];
|
||||||
|
await captionExistingMessage(data);
|
||||||
|
};
|
||||||
|
|
||||||
|
eventSource.on(event_types.MESSAGE_SENT, onMessageEvent);
|
||||||
|
eventSource.on(event_types.MESSAGE_FILE_EMBEDDED, onMessageEvent);
|
||||||
|
|
||||||
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption',
|
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'caption',
|
||||||
callback: captionCommandCallback,
|
callback: captionCommandCallback,
|
||||||
|
|
|
@ -92,6 +92,11 @@
|
||||||
</div>
|
</div>
|
||||||
<label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>{{caption}}</code> <span data-i18n="macro)">macro)</span></small></label>
|
<label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>{{caption}}</code> <span data-i18n="macro)">macro)</span></small></label>
|
||||||
<textarea id="caption_template" class="text_pole" rows="2" placeholder="< Use default >">{{TEMPLATE_DEFAULT}}</textarea>
|
<textarea id="caption_template" class="text_pole" rows="2" placeholder="< Use default >">{{TEMPLATE_DEFAULT}}</textarea>
|
||||||
|
<label class="checkbox_label" for="caption_auto_mode">
|
||||||
|
<input id="caption_auto_mode" type="checkbox" class="checkbox">
|
||||||
|
<span data-i18n="Automatically caption images">Automatically caption images</span>
|
||||||
|
<i class="fa-solid fa-info-circle" title="Automatically caption images when they are pasted into the chat or attached to messages."></i>
|
||||||
|
</label>
|
||||||
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
|
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
|
||||||
<input id="caption_refine_mode" type="checkbox" class="checkbox">
|
<input id="caption_refine_mode" type="checkbox" class="checkbox">
|
||||||
<span data-i18n="Edit captions before saving">Edit captions before saving</span>
|
<span data-i18n="Edit captions before saving">Edit captions before saving</span>
|
||||||
|
|
Loading…
Reference in New Issue