Add proxy support for multimodal captions. Add caption pre-prompt
This commit is contained in:
parent
a88cf1552a
commit
c7c1513e91
|
@ -54,6 +54,10 @@ extras:
|
||||||
openai:
|
openai:
|
||||||
# Will send a random user ID to OpenAI completion API
|
# Will send a random user ID to OpenAI completion API
|
||||||
randomizeUserId: false
|
randomizeUserId: false
|
||||||
|
# If not empty, will add this as a system message to the start of every caption completion prompt
|
||||||
|
# Example: "Perform the instructions to the best of your ability.\n\n" (for LLaVA)
|
||||||
|
# Not used in image inlining mode
|
||||||
|
captionSystemPrompt: ""
|
||||||
# -- DEEPL TRANSLATION CONFIGURATION --
|
# -- DEEPL TRANSLATION CONFIGURATION --
|
||||||
deepl:
|
deepl:
|
||||||
# Available options: default, more, less, prefer_more, prefer_less
|
# Available options: default, more, less, prefer_more, prefer_less
|
||||||
|
|
|
@ -300,7 +300,7 @@ jQuery(function () {
|
||||||
$('#caption_prompt_block').toggle(isMultimodal);
|
$('#caption_prompt_block').toggle(isMultimodal);
|
||||||
$('#caption_multimodal_api').val(extension_settings.caption.multimodal_api);
|
$('#caption_multimodal_api').val(extension_settings.caption.multimodal_api);
|
||||||
$('#caption_multimodal_model').val(extension_settings.caption.multimodal_model);
|
$('#caption_multimodal_model').val(extension_settings.caption.multimodal_model);
|
||||||
$('#caption_multimodal_model option').each(function () {
|
$('#caption_multimodal_block [data-type]').each(function () {
|
||||||
const type = $(this).data('type');
|
const type = $(this).data('type');
|
||||||
$(this).toggle(type === extension_settings.caption.multimodal_api);
|
$(this).toggle(type === extension_settings.caption.multimodal_api);
|
||||||
});
|
});
|
||||||
|
@ -351,6 +351,10 @@ jQuery(function () {
|
||||||
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
<label data-type="openai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
|
||||||
|
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
|
||||||
|
Allow reverse proxy
|
||||||
|
</label>
|
||||||
</div>
|
</div>
|
||||||
<div id="caption_prompt_block">
|
<div id="caption_prompt_block">
|
||||||
<label for="caption_prompt">Caption Prompt</label>
|
<label for="caption_prompt">Caption Prompt</label>
|
||||||
|
@ -377,6 +381,7 @@ jQuery(function () {
|
||||||
switchMultimodalBlocks();
|
switchMultimodalBlocks();
|
||||||
|
|
||||||
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
$('#caption_refine_mode').prop('checked', !!(extension_settings.caption.refine_mode));
|
||||||
|
$('#caption_allow_reverse_proxy').prop('checked', !!(extension_settings.caption.allow_reverse_proxy));
|
||||||
$('#caption_source').val(extension_settings.caption.source);
|
$('#caption_source').val(extension_settings.caption.source);
|
||||||
$('#caption_prompt').val(extension_settings.caption.prompt);
|
$('#caption_prompt').val(extension_settings.caption.prompt);
|
||||||
$('#caption_template').val(extension_settings.caption.template);
|
$('#caption_template').val(extension_settings.caption.template);
|
||||||
|
@ -394,4 +399,8 @@ jQuery(function () {
|
||||||
extension_settings.caption.template = String($('#caption_template').val());
|
extension_settings.caption.template = String($('#caption_template').val());
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
});
|
});
|
||||||
|
$('#caption_allow_reverse_proxy').on('input', () => {
|
||||||
|
extension_settings.caption.allow_reverse_proxy = $('#caption_allow_reverse_proxy').prop('checked');
|
||||||
|
saveSettingsDebounced();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import { getRequestHeaders } from '../../script.js';
|
import { getRequestHeaders } from '../../script.js';
|
||||||
import { extension_settings } from '../extensions.js';
|
import { extension_settings } from '../extensions.js';
|
||||||
|
import { oai_settings } from '../openai.js';
|
||||||
import { SECRET_KEYS, secret_state } from '../secrets.js';
|
import { SECRET_KEYS, secret_state } from '../secrets.js';
|
||||||
import { createThumbnail } from '../utils.js';
|
import { createThumbnail, isValidUrl } from '../utils.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates a caption for an image using a multimodal model.
|
* Generates a caption for an image using a multimodal model.
|
||||||
|
@ -35,6 +36,15 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const useReverseProxy =
|
||||||
|
extension_settings.caption.multimodal_api === 'openai'
|
||||||
|
&& extension_settings.caption.allow_reverse_proxy
|
||||||
|
&& oai_settings.reverse_proxy
|
||||||
|
&& isValidUrl(oai_settings.reverse_proxy);
|
||||||
|
|
||||||
|
const proxyUrl = useReverseProxy ? oai_settings.reverse_proxy : '';
|
||||||
|
const proxyPassword = useReverseProxy ? oai_settings.proxy_password : '';
|
||||||
|
|
||||||
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
|
const apiResult = await fetch(`/api/${isGoogle ? 'google' : 'openai'}/caption-image`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: getRequestHeaders(),
|
headers: getRequestHeaders(),
|
||||||
|
@ -46,6 +56,8 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||||
: {
|
: {
|
||||||
api: extension_settings.caption.multimodal_api || 'openai',
|
api: extension_settings.caption.multimodal_api || 'openai',
|
||||||
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
model: extension_settings.caption.multimodal_model || 'gpt-4-vision-preview',
|
||||||
|
reverse_proxy: proxyUrl,
|
||||||
|
proxy_password: proxyPassword,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
|
@ -4,6 +4,7 @@ const express = require('express');
|
||||||
const FormData = require('form-data');
|
const FormData = require('form-data');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const { jsonParser, urlencodedParser } = require('../express-common');
|
const { jsonParser, urlencodedParser } = require('../express-common');
|
||||||
|
const { getConfigValue } = require('../util');
|
||||||
|
|
||||||
const router = express.Router();
|
const router = express.Router();
|
||||||
|
|
||||||
|
@ -11,15 +12,19 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
||||||
try {
|
try {
|
||||||
let key = '';
|
let key = '';
|
||||||
|
|
||||||
if (request.body.api === 'openai') {
|
if (request.body.api === 'openai' && !request.body.reverse_proxy) {
|
||||||
key = readSecret(SECRET_KEYS.OPENAI);
|
key = readSecret(SECRET_KEYS.OPENAI);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (request.body.api === 'openrouter') {
|
if (request.body.api === 'openrouter' && !request.body.reverse_proxy) {
|
||||||
key = readSecret(SECRET_KEYS.OPENROUTER);
|
key = readSecret(SECRET_KEYS.OPENROUTER);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!key) {
|
if (request.body.reverse_proxy && request.body.proxy_password) {
|
||||||
|
key = request.body.proxy_password;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!key && !request.body.reverse_proxy) {
|
||||||
console.log('No key found for API', request.body.api);
|
console.log('No key found for API', request.body.api);
|
||||||
return response.sendStatus(400);
|
return response.sendStatus(400);
|
||||||
}
|
}
|
||||||
|
@ -38,6 +43,14 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
||||||
max_tokens: 500,
|
max_tokens: 500,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const captionSystemPrompt = getConfigValue('openai.captionSystemPrompt');
|
||||||
|
if (captionSystemPrompt) {
|
||||||
|
body.messages.unshift({
|
||||||
|
role: 'system',
|
||||||
|
content: captionSystemPrompt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
console.log('Multimodal captioning request', body);
|
console.log('Multimodal captioning request', body);
|
||||||
|
|
||||||
let apiUrl = '';
|
let apiUrl = '';
|
||||||
|
@ -52,6 +65,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
|
||||||
apiUrl = 'https://api.openai.com/v1/chat/completions';
|
apiUrl = 'https://api.openai.com/v1/chat/completions';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request.body.reverse_proxy) {
|
||||||
|
apiUrl = `${request.body.reverse_proxy}/chat/completions`;
|
||||||
|
}
|
||||||
|
|
||||||
const result = await fetch(apiUrl, {
|
const result = await fetch(apiUrl, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
|
|
Loading…
Reference in New Issue