diff --git a/default/config.yaml b/default/config.yaml
index fd0be655c..edee81009 100644
--- a/default/config.yaml
+++ b/default/config.yaml
@@ -54,6 +54,10 @@ extras:
openai:
# Will send a random user ID to OpenAI completion API
randomizeUserId: false
+ # If not empty, will add this as a system message to the start of every caption completion prompt
+ # Example: "Perform the instructions to the best of your ability.\n\n" (for LLaVA)
+ # Not used in image inlining mode
+ captionSystemPrompt: ""
# -- DEEPL TRANSLATION CONFIGURATION --
deepl:
# Available options: default, more, less, prefer_more, prefer_less
diff --git a/public/scripts/extensions/caption/index.js b/public/scripts/extensions/caption/index.js
index aa666a232..97cc4cf37 100644
--- a/public/scripts/extensions/caption/index.js
+++ b/public/scripts/extensions/caption/index.js
@@ -300,7 +300,7 @@ jQuery(function () {
$('#caption_prompt_block').toggle(isMultimodal);
$('#caption_multimodal_api').val(extension_settings.caption.multimodal_api);
$('#caption_multimodal_model').val(extension_settings.caption.multimodal_model);
- $('#caption_multimodal_model option').each(function () {
+ $('#caption_multimodal_block [data-type]').each(function () {
const type = $(this).data('type');
$(this).toggle(type === extension_settings.caption.multimodal_api);
});
@@ -351,6 +351,10 @@ jQuery(function () {
haotian-liu/llava-13b
+
+
+ Allow reverse proxy
+
Caption Prompt
@@ -377,6 +381,7 @@ jQuery(function () {
switchMultimodalBlocks();
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
+ $('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
$('#caption_source').val(extension_settings.caption.source);
$('#caption_prompt').val(extension_settings.caption.prompt);
$('#caption_template').val(extension_settings.caption.template);
@@ -394,4 +399,8 @@ jQuery(function () {
extension_settings.caption.template = String($('#caption_template').val());
saveSettingsDebounced();
});
+ $('#caption_allow_reverse_proxy').on('input', () => {
+ extension_settings.caption.allow_reverse_proxy = $('#caption_allow_reverse_proxy').prop('checked');
+ saveSettingsDebounced();
+ });
});
diff --git a/public/scripts/extensions/shared.js b/public/scripts/extensions/shared.js
index 9058204ec..7d4e16720 100644
--- a/public/scripts/extensions/shared.js
+++ b/public/scripts/extensions/shared.js
@@ -1,7 +1,8 @@
import { getRequestHeaders } from '../../script.js';
import { extension_settings } from '../extensions.js';
+import { oai_settings } from '../openai.js';
import { SECRET_KEYS, secret_state } from '../secrets.js';
-import { createThumbnail } from '../utils.js';
+import { createThumbnail, isValidUrl } from '../utils.js';
/**
* Generates a caption for an image using a multimodal model.
@@ -35,6 +36,15 @@ export async function getMultimodalCaption(base64Img, prompt) {
}
}
+ const useReverseProxy =
+ extension_settings.caption.multimodal_api === 'openai'
+ && extension_settings.caption.allow_reverse_proxy
+ && oai_settings.reverse_proxy
+ && isValidUrl(oai_settings.reverse_proxy);
+
+ const proxyUrl = useReverseProxy ? oai_settings.reverse_proxy : '';
+ const proxyPassword = useReverseProxy ? oai_settings.proxy_password : '';
+
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
method: 'POST',
headers: getRequestHeaders(),
@@ -46,6 +56,8 @@ export async function getMultimodalCaption(base64Img, prompt) {
: {
api: extension_settings.caption.multimodal_api || 'openai',
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
+ reverse_proxy: proxyUrl,
+ proxy_password: proxyPassword,
}),
}),
});
diff --git a/src/endpoints/openai.js b/src/endpoints/openai.js
index 23a19f943..cb98cf274 100644
--- a/src/endpoints/openai.js
+++ b/src/endpoints/openai.js
@@ -4,6 +4,7 @@ const express = require('express');
const FormData = require('form-data');
const fs = require('fs');
const { jsonParser, urlencodedParser } = require('../express-common');
+const { getConfigValue } = require('../util');
const router = express.Router();
@@ -11,15 +12,19 @@ router.post('/caption-image', jsonParser, async (request, response) => {
try {
let key = '';
- if (request.body.api === 'openai') {
+ if (request.body.api === 'openai' && !request.body.reverse_proxy) {
key = readSecret(SECRET_KEYS.OPENAI);
}
- if (request.body.api === 'openrouter') {
+ if (request.body.api === 'openrouter' && !request.body.reverse_proxy) {
key = readSecret(SECRET_KEYS.OPENROUTER);
}
- if (!key) {
+ if (request.body.reverse_proxy && request.body.proxy_password) {
+ key = request.body.proxy_password;
+ }
+
+ if (!key && !request.body.reverse_proxy) {
console.log('No key found for API', request.body.api);
return response.sendStatus(400);
}
@@ -38,6 +43,14 @@ router.post('/caption-image', jsonParser, async (request, response) => {
max_tokens: 500,
};
+ const captionSystemPrompt = getConfigValue('openai.captionSystemPrompt');
+ if (captionSystemPrompt) {
+ body.messages.unshift({
+ role: 'system',
+ content: captionSystemPrompt,
+ });
+ }
+
console.log('Multimodal captioning request', body);
let apiUrl = '';
@@ -52,6 +65,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
apiUrl = 'https://api.openai.com/v1/chat/completions';
}
+ if (request.body.reverse_proxy) {
+ apiUrl = `${request.body.reverse_proxy}/chat/completions`;
+ }
+
const result = await fetch(apiUrl, {
method: 'POST',
headers: {