Add /caption command

This commit is contained in:
Cohee 2024-04-18 16:22:33 +03:00
parent 75afe29f28
commit d281767867
1 changed files with 56 additions and 14 deletions

View File

@ -1,10 +1,11 @@
import { getBase64Async, saveBase64AsFile } from '../../utils.js';
import { getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from '../../extensions.js';
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams } from '../../../script.js';
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getMultimodalCaption } from '../shared.js';
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
import { registerSlashCommand } from '../../slash-commands.js';
export { MODULE_NAME };
const MODULE_NAME = 'caption';
@ -124,9 +125,10 @@ async function sendCaptionedMessage(caption, image) {
* Generates a caption for an image using a selected source.
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
* @param {string} externalPrompt Caption prompt
* @returns {Promise<{caption: string}>} Generated caption
*/
async function doCaptionRequest(base64Img, fileData) {
async function doCaptionRequest(base64Img, fileData, externalPrompt) {
switch (extension_settings.caption.source) {
case 'local':
return await captionLocal(base64Img);
@ -135,7 +137,7 @@ async function doCaptionRequest(base64Img, fileData) {
case 'horde':
return await captionHorde(base64Img);
case 'multimodal':
return await captionMultimodal(fileData);
return await captionMultimodal(fileData, externalPrompt);
default:
throw new Error('Unknown caption source.');
}
@ -214,12 +216,13 @@ async function captionHorde(base64Img) {
/**
* Generates a caption for an image using a multimodal model.
* @param {string} base64Img Base64 encoded image with the data:image/...;base64, prefix
* @param {string} externalPrompt Caption prompt
* @returns {Promise<{caption: string}>} Generated caption
*/
async function captionMultimodal(base64Img) {
let prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
async function captionMultimodal(base64Img, externalPrompt) {
let prompt = externalPrompt || extension_settings.caption.prompt || PROMPT_DEFAULT;
if (extension_settings.caption.prompt_ask) {
if (!externalPrompt && extension_settings.caption.prompt_ask) {
const customPrompt = await callPopup('<h3>Enter a comment or question:</h3>', 'input', prompt, { rows: 2 });
if (!customPrompt) {
throw new Error('User aborted the caption sending.');
@ -231,29 +234,46 @@ async function captionMultimodal(base64Img) {
return { caption };
}
async function onSelectImage(e) {
setSpinnerIcon();
/**
* Handles the image selection event.
* @param {Event} e Input event
* @param {string} prompt Caption prompt
* @param {boolean} quiet Suppresses sending a message
* @returns {Promise<string>} Generated caption
*/
async function onSelectImage(e, prompt, quiet) {
if (!(e.target instanceof HTMLInputElement)) {
return '';
}
const file = e.target.files[0];
const form = e.target.form;
if (!file || !(file instanceof File)) {
return;
form && form.reset();
return '';
}
try {
setSpinnerIcon();
const context = getContext();
const fileData = await getBase64Async(file);
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
const base64Data = fileData.split(',')[1];
const { caption } = await doCaptionRequest(base64Data, fileData);
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
await sendCaptionedMessage(caption, imagePath);
const { caption } = await doCaptionRequest(base64Data, fileData, prompt);
if (!quiet) {
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
await sendCaptionedMessage(caption, imagePath);
}
return caption;
}
catch (error) {
toastr.error('Failed to caption image.');
console.log(error);
return '';
}
finally {
e.target.form.reset();
form && form.reset();
setImageIcon();
}
}
@ -263,6 +283,26 @@ function onRefineModeInput() {
saveSettingsDebounced();
}
/**
* Callback for the /caption command.
* @param {object} args Named parameters
* @param {string} prompt Caption prompt
*/
function captionCommandCallback(args, prompt) {
return new Promise(resolve => {
const quiet = isTrueBoolean(args?.quiet);
const input = document.createElement('input');
input.type = 'file';
input.accept = 'image/*';
input.onchange = async (e) => {
const caption = await onSelectImage(e, prompt, quiet);
resolve(caption);
};
input.oncancel = () => resolve('');
input.click();
});
}
jQuery(function () {
function addSendPictureButton() {
const sendButton = $(`
@ -308,7 +348,7 @@ jQuery(function () {
$(imgForm).append(inputHtml);
$(imgForm).hide();
$('#form_sheld').append(imgForm);
$('#img_file').on('change', onSelectImage);
$('#img_file').on('change', (e) => onSelectImage(e.originalEvent, '', false));
}
function switchMultimodalBlocks() {
const isMultimodal = extension_settings.caption.source === 'multimodal';
@ -457,4 +497,6 @@ jQuery(function () {
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
saveSettingsDebounced();
});
registerSlashCommand('caption', captionCommandCallback, [], '<span class="monospace">quiet=true/false [prompt]</span> - caption an image with an optional prompt and passes the caption down the pipe. Only multimodal sources support custom prompts. Set the "quiet" argument to true to suppress sending a captioned message, default: false.', true, true);
});