mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add OpenRouter and Llava to captioning plugin.
This commit is contained in:
@ -1386,9 +1386,9 @@
|
|||||||
<div class="range-block" data-source="openai,openrouter">
|
<div class="range-block" data-source="openai,openrouter">
|
||||||
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
|
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
|
||||||
<input id="openai_image_inlining" type="checkbox" />
|
<input id="openai_image_inlining" type="checkbox" />
|
||||||
<span data-i18n="Send inline images">Send inline images (only GPT-4V model)</span>
|
<span data-i18n="Send inline images">Send inline images</span>
|
||||||
<div id="image_inlining_hint" class="flexBasis100p toggle-description justifyLeft">
|
<div id="image_inlining_hint" class="flexBasis100p toggle-description justifyLeft">
|
||||||
Natively replaces captioning if the model supports it.
|
Natively replaces captioning if the model supports it (e.g. GPT-4V or Llava 13B).
|
||||||
Use the <code><i class="fa-solid fa-image"></i></code> action on any message or the
|
Use the <code><i class="fa-solid fa-image"></i></code> action on any message or the
|
||||||
<code><i class="fa-solid fa-wand-magic-sparkles"></i></code> menu to attach an image to the chat.
|
<code><i class="fa-solid fa-wand-magic-sparkles"></i></code> menu to attach an image to the chat.
|
||||||
</div>
|
</div>
|
||||||
|
@ -4,19 +4,18 @@ import { appendImageToMessage, callPopup, getRequestHeaders, saveSettingsDebounc
|
|||||||
import { getMessageTimeStamp } from "../../RossAscends-mods.js";
|
import { getMessageTimeStamp } from "../../RossAscends-mods.js";
|
||||||
import { SECRET_KEYS, secret_state } from "../../secrets.js";
|
import { SECRET_KEYS, secret_state } from "../../secrets.js";
|
||||||
import { isImageInliningSupported } from "../../openai.js";
|
import { isImageInliningSupported } from "../../openai.js";
|
||||||
|
import { getMultimodalCaption } from "../shared.js";
|
||||||
export { MODULE_NAME };
|
export { MODULE_NAME };
|
||||||
|
|
||||||
const MODULE_NAME = 'caption';
|
const MODULE_NAME = 'caption';
|
||||||
const UPDATE_INTERVAL = 1000;
|
|
||||||
|
|
||||||
const PROMPT_DEFAULT = 'What’s in this image?';
|
const PROMPT_DEFAULT = 'What’s in this image?';
|
||||||
const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]';
|
const TEMPLATE_DEFAULT = '[{{user}} sends {{char}} a picture that contains: {{caption}}]';
|
||||||
|
|
||||||
async function moduleWorker() {
|
/**
|
||||||
const hasConnection = getContext().onlineStatus !== 'no_connection';
|
* Migrates old extension settings to the new format.
|
||||||
$('#send_picture').toggle(hasConnection);
|
* Must keep this function for compatibility with old settings.
|
||||||
}
|
*/
|
||||||
|
|
||||||
function migrateSettings() {
|
function migrateSettings() {
|
||||||
if (extension_settings.caption.local !== undefined) {
|
if (extension_settings.caption.local !== undefined) {
|
||||||
extension_settings.caption.source = extension_settings.caption.local ? 'local' : 'extras';
|
extension_settings.caption.source = extension_settings.caption.local ? 'local' : 'extras';
|
||||||
@ -28,6 +27,20 @@ function migrateSettings() {
|
|||||||
extension_settings.caption.source = 'extras';
|
extension_settings.caption.source = 'extras';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (extension_settings.caption.source === 'openai') {
|
||||||
|
extension_settings.caption.source = 'multimodal';
|
||||||
|
extension_settings.caption.multimodal_api = 'openai';
|
||||||
|
extension_settings.caption.multimodal_model = 'gpt-4-vision-preview';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extension_settings.caption.multimodal_api) {
|
||||||
|
extension_settings.caption.multimodal_api = 'openai';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extension_settings.caption.multimodal_model) {
|
||||||
|
extension_settings.caption.multimodal_model = 'gpt-4-vision-preview';
|
||||||
|
}
|
||||||
|
|
||||||
if (!extension_settings.caption.prompt) {
|
if (!extension_settings.caption.prompt) {
|
||||||
extension_settings.caption.prompt = PROMPT_DEFAULT;
|
extension_settings.caption.prompt = PROMPT_DEFAULT;
|
||||||
}
|
}
|
||||||
@ -37,6 +50,9 @@ function migrateSettings() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets an image icon for the send button.
|
||||||
|
*/
|
||||||
async function setImageIcon() {
|
async function setImageIcon() {
|
||||||
try {
|
try {
|
||||||
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
||||||
@ -48,6 +64,9 @@ async function setImageIcon() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a spinner icon for the send button.
|
||||||
|
*/
|
||||||
async function setSpinnerIcon() {
|
async function setSpinnerIcon() {
|
||||||
try {
|
try {
|
||||||
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
const sendButton = $('#send_picture .extensionsMenuExtensionButton');
|
||||||
@ -59,6 +78,11 @@ async function setSpinnerIcon() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends a captioned message to the chat.
|
||||||
|
* @param {string} caption Caption text
|
||||||
|
* @param {string} image Image URL
|
||||||
|
*/
|
||||||
async function sendCaptionedMessage(caption, image) {
|
async function sendCaptionedMessage(caption, image) {
|
||||||
const context = getContext();
|
const context = getContext();
|
||||||
let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
|
let template = extension_settings.caption.template || TEMPLATE_DEFAULT;
|
||||||
@ -98,10 +122,10 @@ async function sendCaptionedMessage(caption, image) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Generates a caption for an image using a selected source.
|
||||||
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
|
* @param {string} fileData Base64 encoded image with the data:image/...;base64, prefix
|
||||||
* @returns
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
*/
|
*/
|
||||||
async function doCaptionRequest(base64Img, fileData) {
|
async function doCaptionRequest(base64Img, fileData) {
|
||||||
switch (extension_settings.caption.source) {
|
switch (extension_settings.caption.source) {
|
||||||
@ -111,13 +135,18 @@ async function doCaptionRequest(base64Img, fileData) {
|
|||||||
return await captionExtras(base64Img);
|
return await captionExtras(base64Img);
|
||||||
case 'horde':
|
case 'horde':
|
||||||
return await captionHorde(base64Img);
|
return await captionHorde(base64Img);
|
||||||
case 'openai':
|
case 'multimodal':
|
||||||
return await captionOpenAI(fileData);
|
return await captionMultimodal(fileData);
|
||||||
default:
|
default:
|
||||||
throw new Error('Unknown caption source.');
|
throw new Error('Unknown caption source.');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a caption for an image using Extras API.
|
||||||
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
|
*/
|
||||||
async function captionExtras(base64Img) {
|
async function captionExtras(base64Img) {
|
||||||
if (!modules.includes('caption')) {
|
if (!modules.includes('caption')) {
|
||||||
throw new Error('No captioning module is available.');
|
throw new Error('No captioning module is available.');
|
||||||
@ -143,6 +172,11 @@ async function captionExtras(base64Img) {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a caption for an image using a local model.
|
||||||
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
|
*/
|
||||||
async function captionLocal(base64Img) {
|
async function captionLocal(base64Img) {
|
||||||
const apiResult = await fetch('/api/extra/caption', {
|
const apiResult = await fetch('/api/extra/caption', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@ -158,6 +192,11 @@ async function captionLocal(base64Img) {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a caption for an image using a Horde model.
|
||||||
|
* @param {string} base64Img Base64 encoded image without the data:image/...;base64, prefix
|
||||||
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
|
*/
|
||||||
async function captionHorde(base64Img) {
|
async function captionHorde(base64Img) {
|
||||||
const apiResult = await fetch('/api/horde/caption-image', {
|
const apiResult = await fetch('/api/horde/caption-image', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@ -173,20 +212,15 @@ async function captionHorde(base64Img) {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function captionOpenAI(base64Img) {
|
/**
|
||||||
|
* Generates a caption for an image using a multimodal model.
|
||||||
|
* @param {string} base64Img Base64 encoded image with the data:image/...;base64, prefix
|
||||||
|
* @returns {Promise<{caption: string}>} Generated caption
|
||||||
|
*/
|
||||||
|
async function captionMultimodal(base64Img) {
|
||||||
const prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
|
const prompt = extension_settings.caption.prompt || PROMPT_DEFAULT;
|
||||||
const apiResult = await fetch('/api/openai/caption-image', {
|
const caption = await getMultimodalCaption(base64Img, prompt);
|
||||||
method: 'POST',
|
return { caption };
|
||||||
headers: getRequestHeaders(),
|
|
||||||
body: JSON.stringify({ image: base64Img, prompt: prompt }),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!apiResult.ok) {
|
|
||||||
throw new Error('Failed to caption image via OpenAI.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await apiResult.json();
|
|
||||||
return data;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function onSelectImage(e) {
|
async function onSelectImage(e) {
|
||||||
@ -202,11 +236,8 @@ async function onSelectImage(e) {
|
|||||||
const fileData = await getBase64Async(file);
|
const fileData = await getBase64Async(file);
|
||||||
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
const base64Format = fileData.split(',')[0].split(';')[0].split('/')[1];
|
||||||
const base64Data = fileData.split(',')[1];
|
const base64Data = fileData.split(',')[1];
|
||||||
const data = await doCaptionRequest(base64Data, fileData);
|
const { caption } = await doCaptionRequest(base64Data, fileData);
|
||||||
const caption = data.caption;
|
const imagePath = await saveBase64AsFile(base64Data, context.name2, '', base64Format);
|
||||||
const imageToSave = data.thumbnail ? data.thumbnail : base64Data;
|
|
||||||
const format = data.thumbnail ? 'jpeg' : base64Format;
|
|
||||||
const imagePath = await saveBase64AsFile(imageToSave, context.name2, '', format);
|
|
||||||
await sendCaptionedMessage(caption, imagePath);
|
await sendCaptionedMessage(caption, imagePath);
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
@ -310,7 +341,6 @@ jQuery(function () {
|
|||||||
</div>`);
|
</div>`);
|
||||||
|
|
||||||
$('#extensionsMenu').prepend(sendButton);
|
$('#extensionsMenu').prepend(sendButton);
|
||||||
$(sendButton).hide();
|
|
||||||
$(sendButton).on('click', () => {
|
$(sendButton).on('click', () => {
|
||||||
if (isImageInliningSupported()) {
|
if (isImageInliningSupported()) {
|
||||||
console.log('Native image inlining is supported. Skipping captioning.');
|
console.log('Native image inlining is supported. Skipping captioning.');
|
||||||
@ -320,12 +350,13 @@ jQuery(function () {
|
|||||||
|
|
||||||
const hasCaptionModule =
|
const hasCaptionModule =
|
||||||
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
||||||
(extension_settings.caption.source === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
|
||||||
|
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
||||||
extension_settings.caption.source === 'local' ||
|
extension_settings.caption.source === 'local' ||
|
||||||
extension_settings.caption.source === 'horde';
|
extension_settings.caption.source === 'horde';
|
||||||
|
|
||||||
if (!hasCaptionModule) {
|
if (!hasCaptionModule) {
|
||||||
toastr.error('No captioning module is available. Choose other captioning source in the extension settings.');
|
toastr.error('Choose other captioning source in the extension settings.', 'Captioning is not available');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,6 +374,29 @@ jQuery(function () {
|
|||||||
$('#form_sheld').append(imgForm);
|
$('#form_sheld').append(imgForm);
|
||||||
$('#img_file').on('change', onSelectImage);
|
$('#img_file').on('change', onSelectImage);
|
||||||
}
|
}
|
||||||
|
function switchMultimodalBlocks() {
|
||||||
|
const isMultimodal = extension_settings.caption.source === 'multimodal';
|
||||||
|
$('#caption_multimodal_block').toggle(isMultimodal);
|
||||||
|
$('#caption_prompt_block').toggle(isMultimodal);
|
||||||
|
$('#caption_multimodal_api').val(extension_settings.caption.multimodal_api);
|
||||||
|
$('#caption_multimodal_model').val(extension_settings.caption.multimodal_model);
|
||||||
|
$('#caption_multimodal_model option').each(function () {
|
||||||
|
const type = $(this).data('type');
|
||||||
|
$(this).toggle(type === extension_settings.caption.multimodal_api);
|
||||||
|
});
|
||||||
|
$('#caption_multimodal_api').on('change', () => {
|
||||||
|
const api = String($('#caption_multimodal_api').val());
|
||||||
|
const model = String($(`#caption_multimodal_model option[data-type="${api}"]`).first().val());
|
||||||
|
extension_settings.caption.multimodal_api = api;
|
||||||
|
extension_settings.caption.multimodal_model = model;
|
||||||
|
saveSettingsDebounced();
|
||||||
|
switchMultimodalBlocks();
|
||||||
|
});
|
||||||
|
$('#caption_multimodal_model').on('change', () => {
|
||||||
|
extension_settings.caption.multimodal_model = String($('#caption_multimodal_model').val());
|
||||||
|
saveSettingsDebounced();
|
||||||
|
});
|
||||||
|
}
|
||||||
function addSettings() {
|
function addSettings() {
|
||||||
const html = `
|
const html = `
|
||||||
<div class="caption_settings">
|
<div class="caption_settings">
|
||||||
@ -355,12 +409,31 @@ jQuery(function () {
|
|||||||
<label for="caption_source">Source:</label>
|
<label for="caption_source">Source:</label>
|
||||||
<select id="caption_source" class="text_pole">
|
<select id="caption_source" class="text_pole">
|
||||||
<option value="local">Local</option>
|
<option value="local">Local</option>
|
||||||
|
<option value="multimodal">Multimodal (OpenAI / OpenRouter)</option>
|
||||||
<option value="extras">Extras</option>
|
<option value="extras">Extras</option>
|
||||||
<option value="horde">Horde</option>
|
<option value="horde">Horde</option>
|
||||||
<option value="openai">OpenAI</option>
|
|
||||||
</select>
|
</select>
|
||||||
<label for="caption_prompt">Caption Prompt (OpenAI):</label>
|
<div id="caption_multimodal_block" class="flex-container wide100p">
|
||||||
|
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||||
|
<label for="caption_multimodal_api">API</label>
|
||||||
|
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||||
|
<option value="openai">OpenAI</option>
|
||||||
|
<option value="openrouter">OpenRouter</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||||
|
<label for="caption_multimodal_model">Model</label>
|
||||||
|
<select id="caption_multimodal_model" class="flex1 text_pole">
|
||||||
|
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option>
|
||||||
|
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
|
||||||
|
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="caption_prompt_block">
|
||||||
|
<label for="caption_prompt">Caption Prompt (Multimodal):</label>
|
||||||
<textarea id="caption_prompt" class="text_pole" rows="1" placeholder="< Use default >">${PROMPT_DEFAULT}</textarea>
|
<textarea id="caption_prompt" class="text_pole" rows="1" placeholder="< Use default >">${PROMPT_DEFAULT}</textarea>
|
||||||
|
</div>
|
||||||
<label for="caption_template">Message Template: <small>(use <tt>{{caption}}</tt> macro)</small></label>
|
<label for="caption_template">Message Template: <small>(use <tt>{{caption}}</tt> macro)</small></label>
|
||||||
<textarea id="caption_template" class="text_pole" rows="2" placeholder="< Use default >">${TEMPLATE_DEFAULT}</textarea>
|
<textarea id="caption_template" class="text_pole" rows="2" placeholder="< Use default >">${TEMPLATE_DEFAULT}</textarea>
|
||||||
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
|
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode">
|
||||||
@ -379,7 +452,7 @@ jQuery(function () {
|
|||||||
addSendPictureButton();
|
addSendPictureButton();
|
||||||
setImageIcon();
|
setImageIcon();
|
||||||
migrateSettings();
|
migrateSettings();
|
||||||
moduleWorker();
|
switchMultimodalBlocks();
|
||||||
|
|
||||||
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
||||||
$('#caption_source').val(extension_settings.caption.source);
|
$('#caption_source').val(extension_settings.caption.source);
|
||||||
@ -388,6 +461,7 @@ jQuery(function () {
|
|||||||
$('#caption_refine_mode').on('input', onRefineModeInput);
|
$('#caption_refine_mode').on('input', onRefineModeInput);
|
||||||
$('#caption_source').on('change', () => {
|
$('#caption_source').on('change', () => {
|
||||||
extension_settings.caption.source = String($('#caption_source').val());
|
extension_settings.caption.source = String($('#caption_source').val());
|
||||||
|
switchMultimodalBlocks();
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
$('#caption_prompt').on('input', () => {
|
$('#caption_prompt').on('input', () => {
|
||||||
@ -399,5 +473,4 @@ jQuery(function () {
|
|||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
$(document).on('click', '.mes_embed', onImageEmbedClicked);
|
$(document).on('click', '.mes_embed', onImageEmbedClicked);
|
||||||
setInterval(moduleWorker, UPDATE_INTERVAL);
|
|
||||||
});
|
});
|
||||||
|
28
public/scripts/extensions/shared.js
Normal file
28
public/scripts/extensions/shared.js
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import { getRequestHeaders } from "../../script.js";
|
||||||
|
import { extension_settings } from "../extensions.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a caption for an image using a multimodal model.
|
||||||
|
* @param {string} base64Img Base64 encoded image
|
||||||
|
* @param {string} prompt Prompt to use for captioning
|
||||||
|
* @returns {Promise<string>} Generated caption
|
||||||
|
*/
|
||||||
|
export async function getMultimodalCaption(base64Img, prompt) {
|
||||||
|
const apiResult = await fetch('/api/openai/caption-image', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({
|
||||||
|
image: base64Img,
|
||||||
|
prompt: prompt,
|
||||||
|
api: extension_settings.caption.multimodal_api || 'openai',
|
||||||
|
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!apiResult.ok) {
|
||||||
|
throw new Error('Failed to caption image via OpenAI.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { caption } = await apiResult.json();
|
||||||
|
return caption;
|
||||||
|
}
|
@ -3402,7 +3402,8 @@ export function isImageInliningSupported() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const modelId = 'gpt-4-vision';
|
const gpt4v = 'gpt-4-vision';
|
||||||
|
const llava13b = 'llava-13b';
|
||||||
|
|
||||||
if (!oai_settings.image_inlining) {
|
if (!oai_settings.image_inlining) {
|
||||||
return false;
|
return false;
|
||||||
@ -3410,9 +3411,9 @@ export function isImageInliningSupported() {
|
|||||||
|
|
||||||
switch (oai_settings.chat_completion_source) {
|
switch (oai_settings.chat_completion_source) {
|
||||||
case chat_completion_sources.OPENAI:
|
case chat_completion_sources.OPENAI:
|
||||||
return oai_settings.openai_model.includes(modelId);
|
return oai_settings.openai_model.includes(gpt4v);
|
||||||
case chat_completion_sources.OPENROUTER:
|
case chat_completion_sources.OPENROUTER:
|
||||||
return oai_settings.openrouter_model.includes(modelId);
|
return oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava13b);
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -9,15 +9,23 @@ const fetch = require('node-fetch').default;
|
|||||||
function registerEndpoints(app, jsonParser) {
|
function registerEndpoints(app, jsonParser) {
|
||||||
app.post('/api/openai/caption-image', jsonParser, async (request, response) => {
|
app.post('/api/openai/caption-image', jsonParser, async (request, response) => {
|
||||||
try {
|
try {
|
||||||
const key = readSecret(SECRET_KEYS.OPENAI);
|
let key = '';
|
||||||
|
|
||||||
|
if (request.body.api === 'openai') {
|
||||||
|
key = readSecret(SECRET_KEYS.OPENAI);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.body.api === 'openrouter') {
|
||||||
|
key = readSecret(SECRET_KEYS.OPENROUTER);
|
||||||
|
}
|
||||||
|
|
||||||
if (!key) {
|
if (!key) {
|
||||||
console.log('No OpenAI key found');
|
console.log('No key found for API', request.body.api);
|
||||||
return response.sendStatus(401);
|
return response.sendStatus(401);
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
model: "gpt-4-vision-preview",
|
model: request.body.model,
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "user",
|
role: "user",
|
||||||
@ -30,12 +38,26 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
max_tokens: 500
|
max_tokens: 500
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log('OpenAI request', body);
|
console.log('Multimodal captioning request', body);
|
||||||
const result = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
||||||
|
let apiUrl = '';
|
||||||
|
let headers = {};
|
||||||
|
|
||||||
|
if (request.body.api === 'openrouter') {
|
||||||
|
apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
|
||||||
|
headers['HTTP-Referer'] = request.headers.referer;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.body.api === 'openai') {
|
||||||
|
apiUrl = 'https://api.openai.com/v1/chat/completions';
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await fetch(apiUrl, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${key}`,
|
Authorization: `Bearer ${key}`,
|
||||||
|
...headers,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
timeout: 0,
|
timeout: 0,
|
||||||
@ -43,12 +65,12 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
|
|
||||||
if (!result.ok) {
|
if (!result.ok) {
|
||||||
const text = await result.text();
|
const text = await result.text();
|
||||||
console.log('OpenAI request failed', result.statusText, text);
|
console.log('Multimodal captioning request failed', result.statusText, text);
|
||||||
return response.status(500).send(text);
|
return response.status(500).send(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
console.log('OpenAI response', data);
|
console.log('Multimodal captioning response', data);
|
||||||
const caption = data?.choices[0]?.message?.content;
|
const caption = data?.choices[0]?.message?.content;
|
||||||
|
|
||||||
if (!caption) {
|
if (!caption) {
|
||||||
|
Reference in New Issue
Block a user