mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
#1324 Add captions via OpenAI and Horde
This commit is contained in:
@ -2,6 +2,7 @@ import { getBase64Async, saveBase64AsFile } from "../../utils.js";
|
|||||||
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from "../../extensions.js";
|
import { getContext, getApiUrl, doExtrasFetch, extension_settings, modules } from "../../extensions.js";
|
||||||
import { callPopup, getRequestHeaders, saveSettingsDebounced } from "../../../script.js";
|
import { callPopup, getRequestHeaders, saveSettingsDebounced } from "../../../script.js";
|
||||||
import { getMessageTimeStamp } from "../../RossAscends-mods.js";
|
import { getMessageTimeStamp } from "../../RossAscends-mods.js";
|
||||||
|
import { SECRET_KEYS, secret_state } from "../../secrets.js";
|
||||||
export { MODULE_NAME };
|
export { MODULE_NAME };
|
||||||
|
|
||||||
const MODULE_NAME = 'caption';
|
const MODULE_NAME = 'caption';
|
||||||
@ -12,6 +13,14 @@ async function moduleWorker() {
|
|||||||
$('#send_picture').toggle(hasConnection);
|
$('#send_picture').toggle(hasConnection);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function migrateLocalSourceSetting() {
|
||||||
|
if (extension_settings.caption.local !== undefined) {
|
||||||
|
extension_settings.caption.source = extension_settings.caption.local ? 'local' : 'extras';
|
||||||
|
}
|
||||||
|
|
||||||
|
delete extension_settings.caption.local;
|
||||||
|
}
|
||||||
|
|
||||||
async function setImageIcon() {
|
async function setImageIcon() {
|
||||||
try {
|
try {
|
||||||
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
||||||
@ -65,21 +74,32 @@ async function sendCaptionedMessage(caption, image) {
|
|||||||
await context.generate('caption');
|
await context.generate('caption');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function doCaptionRequest(base64Img) {
|
/**
|
||||||
if (extension_settings.caption.local) {
|
*
|
||||||
const apiResult = await fetch('/api/extra/caption', {
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
method: 'POST',
|
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
|
||||||
headers: getRequestHeaders(),
|
* @returns
|
||||||
body: JSON.stringify({ image: base64Img })
|
*/
|
||||||
});
|
async function doCaptionRequest(base64Img, fileData) {
|
||||||
|
switch (extension_settings.caption.source) {
|
||||||
if (!apiResult.ok) {
|
case 'local':
|
||||||
throw new Error('Failed to caption image via local pipeline.');
|
return await captionLocal(base64Img);
|
||||||
|
case 'extras':
|
||||||
|
return await captionExtras(base64Img);
|
||||||
|
case 'horde':
|
||||||
|
return await captionHorde(base64Img);
|
||||||
|
case 'openai':
|
||||||
|
return await captionOpenAI(fileData);
|
||||||
|
default:
|
||||||
|
throw new Error('Unknown caption source.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function captionExtras(base64Img) {
|
||||||
|
if (!modules.includes('caption')) {
|
||||||
|
throw new Error('No captioning module is available.');
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await apiResult.json();
|
|
||||||
return data;
|
|
||||||
} else if (modules.includes('caption')) {
|
|
||||||
const url = new URL(getApiUrl());
|
const url = new URL(getApiUrl());
|
||||||
url.pathname = '/api/caption';
|
url.pathname = '/api/caption';
|
||||||
|
|
||||||
@ -98,9 +118,51 @@ async function doCaptionRequest(base64Img) {
|
|||||||
|
|
||||||
const data = await apiResult.json();
|
const data = await apiResult.json();
|
||||||
return data;
|
return data;
|
||||||
} else {
|
|
||||||
throw new Error('No captioning module is available.');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function captionLocal(base64Img) {
|
||||||
|
const apiResult = await fetch('/api/extra/caption', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ image: base64Img })
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!apiResult.ok) {
|
||||||
|
throw new Error('Failed to caption image via local pipeline.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await apiResult.json();
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function captionHorde(base64Img) {
|
||||||
|
const apiResult = await fetch('/api/horde/caption-image', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ image: base64Img })
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!apiResult.ok) {
|
||||||
|
throw new Error('Failed to caption image via Horde.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await apiResult.json();
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function captionOpenAI(base64Img) {
|
||||||
|
const apiResult = await fetch('/api/openai/caption-image', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({ image: base64Img })
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!apiResult.ok) {
|
||||||
|
throw new Error('Failed to caption image via OpenAI.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await apiResult.json();
|
||||||
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function onSelectImage(e) {
|
async function onSelectImage(e) {
|
||||||
@ -116,7 +178,7 @@ async function onSelectImage(e) {
|
|||||||
const fileData = await getBase64Async(file);
|
const fileData = await getBase64Async(file);
|
||||||
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
||||||
const base64Data = fileData.split(',')[1];
|
const base64Data = fileData.split(',')[1];
|
||||||
const data = await doCaptionRequest(base64Data);
|
const data = await doCaptionRequest(base64Data, fileData);
|
||||||
const caption = data.caption;
|
const caption = data.caption;
|
||||||
const imageToSave = data.thumbnail ? data.thumbnail : base64Data;
|
const imageToSave = data.thumbnail ? data.thumbnail : base64Data;
|
||||||
const format = data.thumbnail ? 'jpeg' : base64Format;
|
const format = data.thumbnail ? 'jpeg' : base64Format;
|
||||||
@ -149,7 +211,11 @@ jQuery(function () {
|
|||||||
$('#extensionsMenu').prepend(sendButton);
|
$('#extensionsMenu').prepend(sendButton);
|
||||||
$(sendButton).hide();
|
$(sendButton).hide();
|
||||||
$(sendButton).on('click', () => {
|
$(sendButton).on('click', () => {
|
||||||
const hasCaptionModule = modules.includes('caption') || extension_settings.caption.local;
|
const hasCaptionModule =
|
||||||
|
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
||||||
|
(extension_settings.caption.source === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
|
||||||
|
extension_settings.caption.source === 'local' ||
|
||||||
|
extension_settings.caption.source === 'horde';
|
||||||
|
|
||||||
if (!hasCaptionModule) {
|
if (!hasCaptionModule) {
|
||||||
toastr.error('No captioning module is available. Either enable the local captioning pipeline or connect to Extras.');
|
toastr.error('No captioning module is available. Either enable the local captioning pipeline or connect to Extras.');
|
||||||
@ -177,11 +243,14 @@ jQuery(function () {
|
|||||||
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
|
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="inline-drawer-content">
|
<div class="inline-drawer-content">
|
||||||
<label class="checkbox_label" for="caption_local">
|
<label for="caption_source">Source:</label>
|
||||||
<input id="caption_local" type="checkbox" class="checkbox">
|
<select id="caption_source" class="form-control">
|
||||||
Use local captioning pipeline
|
<option value="local">Local</option>
|
||||||
</label>
|
<option value="extras">Extras</option>
|
||||||
<label class="checkbox_label" for="caption_refine_mode">
|
<option value="horde">Horde</option>
|
||||||
|
<option value="openai">OpenAI</option>
|
||||||
|
</select>
|
||||||
|
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
|
||||||
<input id="caption_refine_mode" type="checkbox" class="checkbox">
|
<input id="caption_refine_mode" type="checkbox" class="checkbox">
|
||||||
Edit captions before generation
|
Edit captions before generation
|
||||||
</label>
|
</label>
|
||||||
@ -196,12 +265,14 @@ jQuery(function () {
|
|||||||
addPictureSendForm();
|
addPictureSendForm();
|
||||||
addSendPictureButton();
|
addSendPictureButton();
|
||||||
setImageIcon();
|
setImageIcon();
|
||||||
|
migrateLocalSourceSetting();
|
||||||
moduleWorker();
|
moduleWorker();
|
||||||
|
|
||||||
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
||||||
$('#caption_local').prop('checked', !!(extension_settings.caption.local));
|
$('#caption_source').val(extension_settings.caption.source);
|
||||||
$('#caption_refine_mode').on('input', onRefineModeInput);
|
$('#caption_refine_mode').on('input', onRefineModeInput);
|
||||||
$('#caption_local').on('input', () => {
|
$('#caption_source').on('change', () => {
|
||||||
extension_settings.caption.local = !!$('#caption_local').prop('checked');
|
extension_settings.caption.source = String($('#caption_source').val());
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
setInterval(moduleWorker, UPDATE_INTERVAL);
|
setInterval(moduleWorker, UPDATE_INTERVAL);
|
||||||
|
52
src/horde.js
52
src/horde.js
@ -110,6 +110,58 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
app.post('/api/horde/caption-image', jsonParser, async (request, response) => {
|
||||||
|
try {
|
||||||
|
const api_key_horde = readSecret(SECRET_KEYS.HORDE) || ANONYMOUS_KEY;
|
||||||
|
const ai_horde = await getHordeClient();
|
||||||
|
const result = await ai_horde.postAsyncInterrogate({
|
||||||
|
source_image: request.body.image,
|
||||||
|
forms: [{ name: AIHorde.ModelInterrogationFormTypes.caption }],
|
||||||
|
}, { token: api_key_horde });
|
||||||
|
|
||||||
|
if (!result.id) {
|
||||||
|
console.error('Image interrogation request is not satisfyable:', result.message || 'unknown error');
|
||||||
|
return response.sendStatus(400);
|
||||||
|
}
|
||||||
|
|
||||||
|
const MAX_ATTEMPTS = 200;
|
||||||
|
const CHECK_INTERVAL = 3000;
|
||||||
|
|
||||||
|
for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
|
||||||
|
await delay(CHECK_INTERVAL);
|
||||||
|
const status = await ai_horde.getInterrogationStatus(result.id);
|
||||||
|
console.log(status);
|
||||||
|
|
||||||
|
if (status.state === AIHorde.HordeAsyncRequestStates.done) {
|
||||||
|
|
||||||
|
if (status.forms === undefined) {
|
||||||
|
console.error('Image interrogation request failed: no forms found.');
|
||||||
|
return response.sendStatus(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Image interrogation result:', status);
|
||||||
|
const caption = status?.forms[0]?.result?.caption || '';
|
||||||
|
|
||||||
|
if (!caption) {
|
||||||
|
console.error('Image interrogation request failed: no caption found.');
|
||||||
|
return response.sendStatus(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.send({ caption });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status.state === AIHorde.HordeAsyncRequestStates.faulted || status.state === AIHorde.HordeAsyncRequestStates.cancelled) {
|
||||||
|
console.log('Image interrogation request is not successful.');
|
||||||
|
return response.sendStatus(503);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
response.sendStatus(500);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
app.post('/api/horde/user-info', jsonParser, async (_, response) => {
|
app.post('/api/horde/user-info', jsonParser, async (_, response) => {
|
||||||
const api_key_horde = readSecret(SECRET_KEYS.HORDE);
|
const api_key_horde = readSecret(SECRET_KEYS.HORDE);
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
const { readSecret, SECRET_KEYS } = require("./secrets");
|
const { readSecret, SECRET_KEYS } = require("./secrets");
|
||||||
|
const fetch = require('node-fetch').default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers the OpenAI endpoints.
|
* Registers the OpenAI endpoints.
|
||||||
@ -6,6 +7,62 @@ const { readSecret, SECRET_KEYS } = require("./secrets");
|
|||||||
* @param {any} jsonParser
|
* @param {any} jsonParser
|
||||||
*/
|
*/
|
||||||
function registerEndpoints(app, jsonParser) {
|
function registerEndpoints(app, jsonParser) {
|
||||||
|
app.post('/api/openai/caption-image', jsonParser, async (request, response) => {
|
||||||
|
try {
|
||||||
|
const key = readSecret(SECRET_KEYS.OPENAI);
|
||||||
|
|
||||||
|
if (!key) {
|
||||||
|
console.log('No OpenAI key found');
|
||||||
|
return response.sendStatus(401);
|
||||||
|
}
|
||||||
|
|
||||||
|
const body = {
|
||||||
|
model: "gpt-4-vision-preview",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "What’s in this image?" },
|
||||||
|
{ type: "image_url", image_url: { "url": request.body.image } }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens: 300
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('OpenAI request', body);
|
||||||
|
const result = await fetch('https://api.openai.com/v1/chat/completions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${key}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
timeout: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.ok) {
|
||||||
|
const text = await result.text();
|
||||||
|
console.log('OpenAI request failed', result.statusText, text);
|
||||||
|
return response.status(500).send(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await result.json();
|
||||||
|
console.log('OpenAI response', data);
|
||||||
|
const caption = data?.choices[0]?.message?.content;
|
||||||
|
|
||||||
|
if (!caption) {
|
||||||
|
return response.status(500).send('No caption found');
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json({ caption });
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
response.status(500).send('Internal server error');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
app.post('/api/openai/generate-image', jsonParser, async (request, response) => {
|
app.post('/api/openai/generate-image', jsonParser, async (request, response) => {
|
||||||
try {
|
try {
|
||||||
const key = readSecret(SECRET_KEYS.OPENAI);
|
const key = readSecret(SECRET_KEYS.OPENAI);
|
||||||
@ -24,6 +81,7 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
Authorization: `Bearer ${key}`,
|
Authorization: `Bearer ${key}`,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(request.body),
|
body: JSON.stringify(request.body),
|
||||||
|
timeout: 0,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result.ok) {
|
if (!result.ok) {
|
||||||
|
Reference in New Issue
Block a user