mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-10 09:00:36 +01:00
Add /caption command
This commit is contained in:
parent
75afe29f28
commit
d281767867
@ -1,10 +1,11 @@
|
|||||||
import { getBase64Async, saveBase64AsFile } from '../../utils.js';
|
import { getBase64Async, isTrueBoolean, saveBase64AsFile } from '../../utils.js';
|
||||||
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from '../../extensions.js';
|
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from '../../extensions.js';
|
||||||
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams } from '../../../script.js';
|
import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams } from '../../../script.js';
|
||||||
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
|
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
|
||||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||||
import { getMultimodalCaption } from '../shared.js';
|
import { getMultimodalCaption } from '../shared.js';
|
||||||
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
|
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
|
||||||
|
import { registerSlashCommand } from '../../slash-commands.js';
|
||||||
export { MODULE_NAME };
|
export { MODULE_NAME };
|
||||||
|
|
||||||
const MODULE_NAME = 'caption';
|
const MODULE_NAME = 'caption';
|
||||||
@ -124,9 +125,10 @@ async function sendCaptionedMessage(caption, image) {
|
|||||||
* Generates a caption for an image using a selected source.
|
* Generates a caption for an image using a selected source.
|
||||||
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
|
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
|
||||||
|
* @param {string} externalPrompt Caption prompt
|
||||||
* @returns {Promise<{caption: string}>} Generated caption
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
*/
|
*/
|
||||||
async function doCaptionRequest(base64Img, fileData) {
|
async function doCaptionRequest(base64Img, fileData, externalPrompt) {
|
||||||
switch (extension_settings.caption.source) {
|
switch (extension_settings.caption.source) {
|
||||||
case 'local':
|
case 'local':
|
||||||
return await captionLocal(base64Img);
|
return await captionLocal(base64Img);
|
||||||
@ -135,7 +137,7 @@ async function doCaptionRequest(base64Img, fileData) {
|
|||||||
case 'horde':
|
case 'horde':
|
||||||
return await captionHorde(base64Img);
|
return await captionHorde(base64Img);
|
||||||
case 'multimodal':
|
case 'multimodal':
|
||||||
return await captionMultimodal(fileData);
|
return await captionMultimodal(fileData, externalPrompt);
|
||||||
default:
|
default:
|
||||||
throw new Error('Unknown caption source.');
|
throw new Error('Unknown caption source.');
|
||||||
}
|
}
|
||||||
@ -214,12 +216,13 @@ async function captionHorde(base64Img) {
|
|||||||
/**
|
/**
|
||||||
* Generates a caption for an image using a multimodal model.
|
* Generates a caption for an image using a multimodal model.
|
||||||
* @param {string} base64Img Base64 encoded image with the data:image/...;base64, prefix
|
* @param {string} base64Img Base64 encoded image with the data:image/...;base64, prefix
|
||||||
|
* @param {string} externalPrompt Caption prompt
|
||||||
* @returns {Promise<{caption: string}>} Generated caption
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
*/
|
*/
|
||||||
async function captionMultimodal(base64Img) {
|
async function captionMultimodal(base64Img, externalPrompt) {
|
||||||
let prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
|
let prompt = externalPrompt || extension_settings.caption.prompt || PROMPT_DEFAULT;
|
||||||
|
|
||||||
if (extension_settings.caption.prompt_ask) {
|
if (!externalPrompt && extension_settings.caption.prompt_ask) {
|
||||||
const customPrompt = await callPopup('<h3>Enter a comment or question:</h3>', 'input', prompt, { rows: 2 });
|
const customPrompt = await callPopup('<h3>Enter a comment or question:</h3>', 'input', prompt, { rows: 2 });
|
||||||
if (!customPrompt) {
|
if (!customPrompt) {
|
||||||
throw new Error('User aborted the caption sending.');
|
throw new Error('User aborted the caption sending.');
|
||||||
@ -231,29 +234,46 @@ async function captionMultimodal(base64Img) {
|
|||||||
return { caption };
|
return { caption };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function onSelectImage(e) {
|
/**
|
||||||
setSpinnerIcon();
|
* Handles the image selection event.
|
||||||
|
* @param {Event} e Input event
|
||||||
|
* @param {string} prompt Caption prompt
|
||||||
|
* @param {boolean} quiet Suppresses sending a message
|
||||||
|
* @returns {Promise<string>} Generated caption
|
||||||
|
*/
|
||||||
|
async function onSelectImage(e, prompt, quiet) {
|
||||||
|
if (!(e.target instanceof HTMLInputElement)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
const file = e.target.files[0];
|
const file = e.target.files[0];
|
||||||
|
const form = e.target.form;
|
||||||
|
|
||||||
if (!file || !(file instanceof File)) {
|
if (!file || !(file instanceof File)) {
|
||||||
return;
|
form && form.reset();
|
||||||
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
setSpinnerIcon();
|
||||||
const context = getContext();
|
const context = getContext();
|
||||||
const fileData = await getBase64Async(file);
|
const fileData = await getBase64Async(file);
|
||||||
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
||||||
const base64Data = fileData.split(',')[1];
|
const base64Data = fileData.split(',')[1];
|
||||||
const { caption } = await doCaptionRequest(base64Data, fileData);
|
const { caption } = await doCaptionRequest(base64Data, fileData, prompt);
|
||||||
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
|
if (!quiet) {
|
||||||
await sendCaptionedMessage(caption, imagePath);
|
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
|
||||||
|
await sendCaptionedMessage(caption, imagePath);
|
||||||
|
}
|
||||||
|
return caption;
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
toastr.error('Failed to caption image.');
|
toastr.error('Failed to caption image.');
|
||||||
console.log(error);
|
console.log(error);
|
||||||
|
return '';
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
e.target.form.reset();
|
form && form.reset();
|
||||||
setImageIcon();
|
setImageIcon();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -263,6 +283,26 @@ function onRefineModeInput() {
|
|||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback for the /caption command.
|
||||||
|
* @param {object} args Named parameters
|
||||||
|
* @param {string} prompt Caption prompt
|
||||||
|
*/
|
||||||
|
function captionCommandCallback(args, prompt) {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
const quiet = isTrueBoolean(args?.quiet);
|
||||||
|
const input = document.createElement('input');
|
||||||
|
input.type = 'file';
|
||||||
|
input.accept = 'image/*';
|
||||||
|
input.onchange = async (e) => {
|
||||||
|
const caption = await onSelectImage(e, prompt, quiet);
|
||||||
|
resolve(caption);
|
||||||
|
};
|
||||||
|
input.oncancel = () => resolve('');
|
||||||
|
input.click();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
jQuery(function () {
|
jQuery(function () {
|
||||||
function addSendPictureButton() {
|
function addSendPictureButton() {
|
||||||
const sendButton = $(`
|
const sendButton = $(`
|
||||||
@ -308,7 +348,7 @@ jQuery(function () {
|
|||||||
$(imgForm).append(inputHtml);
|
$(imgForm).append(inputHtml);
|
||||||
$(imgForm).hide();
|
$(imgForm).hide();
|
||||||
$('#form_sheld').append(imgForm);
|
$('#form_sheld').append(imgForm);
|
||||||
$('#img_file').on('change', onSelectImage);
|
$('#img_file').on('change', (e) => onSelectImage(e.originalEvent, '', false));
|
||||||
}
|
}
|
||||||
function switchMultimodalBlocks() {
|
function switchMultimodalBlocks() {
|
||||||
const isMultimodal = extension_settings.caption.source === 'multimodal';
|
const isMultimodal = extension_settings.caption.source === 'multimodal';
|
||||||
@ -457,4 +497,6 @@ jQuery(function () {
|
|||||||
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
|
extension_settings.caption.prompt_ask = $('#caption_prompt_ask').prop('checked');
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
registerSlashCommand('caption', captionCommandCallback, [], '<span class="monospace">quiet=true/false [prompt]</span> - caption an image with an optional prompt and passes the caption down the pipe. Only multimodal sources support custom prompts. Set the "quiet" argument to true to suppress sending a captioned message, default: false.', true, true);
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user