Add custom caption source

This commit is contained in:
Cohee
2023-12-20 21:05:20 +02:00
parent cf8d7e7d35
commit ae64c99835
6 changed files with 64 additions and 17 deletions

View File

@ -582,7 +582,7 @@
</div> </div>
<div class="toggle-description justifyLeft" data-i18n="Wraps activated World Info entries before inserting into the prompt."> <div class="toggle-description justifyLeft" data-i18n="Wraps activated World Info entries before inserting into the prompt.">
Wraps activated World Info entries before inserting into the prompt. Use Wraps activated World Info entries before inserting into the prompt. Use
<tt>{0}</tt> to mark a place where the content is inserted. <code>{0}</code> to mark a place where the content is inserted.
</div> </div>
<div class="wide100p"> <div class="wide100p">
<textarea id="wi_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea> <textarea id="wi_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea>
@ -596,7 +596,7 @@
</div> </div>
</div> </div>
<div class="toggle-description justifyLeft" data-i18n="Use scenario to mark a place where the content is inserted."> <div class="toggle-description justifyLeft" data-i18n="Use scenario to mark a place where the content is inserted.">
Use <tt>{{scenario}}</tt> to mark a place where the content is inserted. Use <code>{{scenario}}</code> to mark a place where the content is inserted.
</div> </div>
<div class="wide100p"> <div class="wide100p">
<textarea id="scenario_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea> <textarea id="scenario_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea>
@ -610,7 +610,7 @@
</div> </div>
</div> </div>
<div class="toggle-description justifyLeft" data-i18n="Use personality to mark a place where the content is inserted."> <div class="toggle-description justifyLeft" data-i18n="Use personality to mark a place where the content is inserted.">
Use <tt>{{personality}}</tt> to mark a place where the content is inserted. Use <code>{{personality}}</code> to mark a place where the content is inserted.
</div> </div>
<div class="wide100p"> <div class="wide100p">
<textarea id="personality_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea> <textarea id="personality_format_textarea" class="text_pole textarea_compact autoSetHeight" rows="3" placeholder="&mdash;"></textarea>
@ -735,7 +735,7 @@
<div class="wide100p"> <div class="wide100p">
<input id="openai_reverse_proxy" type="text" class="text_pole" placeholder="https://api.openai.com/v1" maxlength="500" /> <input id="openai_reverse_proxy" type="text" class="text_pole" placeholder="https://api.openai.com/v1" maxlength="500" />
<small class="reverse_proxy_warning"> <small class="reverse_proxy_warning">
Doesn't work? Try adding <tt>/v1</tt> at the end! Doesn't work? Try adding <code>/v1</code> at the end!
</small> </small>
</div> </div>
</div> </div>
@ -1516,7 +1516,7 @@
</span> </span>
</div> </div>
</div> </div>
<div class="range-block" data-source="openai,openrouter,makersuite"> <div class="range-block" data-source="openai,openrouter,makersuite,custom">
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand"> <label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_image_inlining" type="checkbox" /> <input id="openai_image_inlining" type="checkbox" />
<span data-i18n="Send inline images">Send inline images</span> <span data-i18n="Send inline images">Send inline images</span>
@ -1797,7 +1797,7 @@
oobabooga/text-generation-webui oobabooga/text-generation-webui
</a> </a>
<span data-i18n="Make sure you run it with"> <span data-i18n="Make sure you run it with">
Make sure you run it with <tt>--api</tt> flag Make sure you run it with <code>--api</code> flag
</span> </span>
</div> </div>
<div class="flex1"> <div class="flex1">
@ -2231,7 +2231,7 @@
</div> </div>
</form> </form>
<form id="custom_form" data-source="custom"> <form id="custom_form" data-source="custom">
<h4 data-i18n="Endpoint">Endpoint URL</h4> <h4 data-i18n="Custom Endpoint (Base URL)">Custom Endpoint (Base URL)</h4>
<div class="flex-container"> <div class="flex-container">
<input id="custom_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" placeholder="Example: http://localhost:1234/v1"> <input id="custom_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" placeholder="Example: http://localhost:1234/v1">
</div> </div>
@ -2240,7 +2240,10 @@
Doesn't work? Try adding <code>/v1</code> at the end of the URL! Doesn't work? Try adding <code>/v1</code> at the end of the URL!
</small> </small>
</div> </div>
<h4 data-i18n="Custom API Key">Custom API Key</h4> <h4>
<span data-i18n="Custom API Key">Custom API Key</span>
<small>(Optional)</small>
</h4>
<div class="flex-container"> <div class="flex-container">
<input id="api_key_custom" name="api_key_custom" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off"> <input id="api_key_custom" name="api_key_custom" class="text_pole flex1" maxlength="500" value="" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_custom"></div> <div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_custom"></div>
@ -2254,9 +2257,7 @@
</div> </div>
<h4 data-i18n="Available Models">Available Models</h4> <h4 data-i18n="Available Models">Available Models</h4>
<div class="flex-container"> <div class="flex-container">
<select id="model_custom_select"> <select id="model_custom_select" class="text_pole"></select>
<option>-- Connect to the API --</option>
</select>
</div> </div>
</form> </form>
<div class="flex-container flex"> <div class="flex-container flex">

View File

@ -399,7 +399,7 @@ function RA_autoconnect(PrevApi) {
|| (secret_state[SECRET_KEYS.AI21] && oai_settings.chat_completion_source == chat_completion_sources.AI21) || (secret_state[SECRET_KEYS.AI21] && oai_settings.chat_completion_source == chat_completion_sources.AI21)
|| (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) || (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE)
|| (secret_state[SECRET_KEYS.MISTRALAI] && oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI) || (secret_state[SECRET_KEYS.MISTRALAI] && oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI)
|| (secret_state[SECRET_KEYS.CUSTOM] && oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) || (isValidUrl(oai_settings.custom_url) && oai_settings.chat_completion_source == chat_completion_sources.CUSTOM)
) { ) {
$('#api_button_openai').trigger('click'); $('#api_button_openai').trigger('click');
} }

View File

@ -277,6 +277,7 @@ jQuery(function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) || (extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'custom') ||
extension_settings.caption.source === 'local' || extension_settings.caption.source === 'local' ||
extension_settings.caption.source === 'horde'; extension_settings.caption.source === 'horde';
@ -345,6 +346,7 @@ jQuery(function () {
<option value="openai">OpenAI</option> <option value="openai">OpenAI</option>
<option value="openrouter">OpenRouter</option> <option value="openrouter">OpenRouter</option>
<option value="google">Google MakerSuite</option> <option value="google">Google MakerSuite</option>
<option value="custom">Custom (OpenAI-compatible)</option>
</select> </select>
</div> </div>
<div class="flex1 flex-container flexFlowColumn flexNoGap"> <div class="flex1 flex-container flexFlowColumn flexNoGap">
@ -358,6 +360,7 @@ jQuery(function () {
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option> <option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
<option data-type="ollama" value="llava:latest">llava:latest</option> <option data-type="ollama" value="llava:latest">llava:latest</option>
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option> <option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
<option data-type="custom" value="custom_current">[Currently selected]</option>
</select> </select>
</div> </div>
<label data-type="openai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid."> <label data-type="openai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">

View File

@ -24,6 +24,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isGoogle = extension_settings.caption.multimodal_api === 'google'; const isGoogle = extension_settings.caption.multimodal_api === 'google';
const isOllama = extension_settings.caption.multimodal_api === 'ollama'; const isOllama = extension_settings.caption.multimodal_api === 'ollama';
const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp'; const isLlamaCpp = extension_settings.caption.multimodal_api === 'llamacpp';
const isCustom = extension_settings.caption.multimodal_api === 'custom';
const base64Bytes = base64Img.length * 0.75; const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024; const compressionLimit = 2 * 1024 * 1024;
if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) { if (['google', 'openrouter'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) {
@ -68,6 +69,11 @@ export async function getMultimodalCaption(base64Img, prompt) {
requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]; requestBody.server_url = textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP];
} }
if (isCustom) {
requestBody.server_url = oai_settings.custom_url;
requestBody.model = oai_settings.custom_model || 'gpt-4-vision-preview';
}
function getEndpointUrl() { function getEndpointUrl() {
switch (extension_settings.caption.multimodal_api) { switch (extension_settings.caption.multimodal_api) {
case 'google': case 'google':
@ -119,4 +125,8 @@ function throwIfInvalidModel() {
if (extension_settings.caption.multimodal_api === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) { if (extension_settings.caption.multimodal_api === 'llamacpp' && !textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) {
throw new Error('LlamaCPP server URL is not set.'); throw new Error('LlamaCPP server URL is not set.');
} }
if (extension_settings.caption.multimodal_api === 'custom' && !oai_settings.custom_url) {
throw new Error('Custom API URL is not set.');
}
} }

View File

@ -1333,6 +1333,23 @@ function saveModelList(data) {
$('#model_openai_select').val(model).trigger('change'); $('#model_openai_select').val(model).trigger('change');
} }
} }
if (oai_settings.chat_completion_source == chat_completion_sources.CUSTOM) {
$('#model_custom_select').empty();
$('#model_custom_select').append('<option value="">None</option>');
model_list.forEach((model) => {
$('#model_custom_select').append(
$('<option>', {
value: model.id,
text: model.id,
selected: model.id == oai_settings.custom_model,
}));
});
if (!oai_settings.custom_model && model_list.length > 0) {
$('#model_custom_select').val(model_list[0].id).trigger('change');
}
}
} }
function appendOpenRouterOptions(model_list, groupModels = false, sort = false) { function appendOpenRouterOptions(model_list, groupModels = false, sort = false) {
@ -1461,6 +1478,7 @@ async function sendOpenAIRequest(type, messages, signal) {
const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE; const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE;
const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI; const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI;
const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI; const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI;
const isCustom = oai_settings.chat_completion_source == chat_completion_sources.CUSTOM;
const isTextCompletion = (isOAI && textCompletionModels.includes(oai_settings.openai_model)) || (isOpenRouter && oai_settings.openrouter_force_instruct && power_user.instruct.enabled); const isTextCompletion = (isOAI && textCompletionModels.includes(oai_settings.openai_model)) || (isOpenRouter && oai_settings.openrouter_force_instruct && power_user.instruct.enabled);
const isQuiet = type === 'quiet'; const isQuiet = type === 'quiet';
const isImpersonate = type === 'impersonate'; const isImpersonate = type === 'impersonate';
@ -1572,7 +1590,11 @@ async function sendOpenAIRequest(type, messages, signal) {
generate_data['safe_mode'] = false; // already defaults to false, but just incase they change that in the future. generate_data['safe_mode'] = false; // already defaults to false, but just incase they change that in the future.
} }
if ((isOAI || isOpenRouter || isMistral) && oai_settings.seed >= 0) { if (isCustom) {
generate_data['custom_url'] = oai_settings.custom_url;
}
if ((isOAI || isOpenRouter || isMistral || isCustom) && oai_settings.seed >= 0) {
generate_data['seed'] = oai_settings.seed; generate_data['seed'] = oai_settings.seed;
} }
@ -2478,7 +2500,8 @@ async function getStatusOpen() {
} }
if (oai_settings.chat_completion_source === chat_completion_sources.CUSTOM) { if (oai_settings.chat_completion_source === chat_completion_sources.CUSTOM) {
custom_url: oai_settings.custom_url; $('#model_custom_select').empty();
data.custom_url = oai_settings.custom_url;
} }
const canBypass = (oai_settings.chat_completion_source === chat_completion_sources.OPENAI && oai_settings.bypass_status_check) || oai_settings.chat_completion_source === chat_completion_sources.CUSTOM; const canBypass = (oai_settings.chat_completion_source === chat_completion_sources.OPENAI && oai_settings.bypass_status_check) || oai_settings.chat_completion_source === chat_completion_sources.CUSTOM;
@ -3525,7 +3548,7 @@ export function isImageInliningSupported() {
const gpt4v = 'gpt-4-vision'; const gpt4v = 'gpt-4-vision';
const geminiProV = 'gemini-pro-vision'; const geminiProV = 'gemini-pro-vision';
const llava13b = 'llava-13b'; const llava = 'llava';
if (!oai_settings.image_inlining) { if (!oai_settings.image_inlining) {
return false; return false;
@ -3537,7 +3560,9 @@ export function isImageInliningSupported() {
case chat_completion_sources.MAKERSUITE: case chat_completion_sources.MAKERSUITE:
return oai_settings.google_model.includes(geminiProV); return oai_settings.google_model.includes(geminiProV);
case chat_completion_sources.OPENROUTER: case chat_completion_sources.OPENROUTER:
return oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava13b); return !oai_settings.openrouter_force_instruct && (oai_settings.openrouter_model.includes(gpt4v) || oai_settings.openrouter_model.includes(llava));
case chat_completion_sources.CUSTOM:
return oai_settings.custom_model.includes(gpt4v) || oai_settings.custom_model.includes(llava) || oai_settings.custom_model.includes(geminiProV);
default: default:
return false; return false;
} }

View File

@ -24,7 +24,11 @@ router.post('/caption-image', jsonParser, async (request, response) => {
key = request.body.proxy_password; key = request.body.proxy_password;
} }
if (!key && !request.body.reverse_proxy) { if (request.body.api === 'custom') {
key = readSecret(SECRET_KEYS.CUSTOM);
}
if (!key && !request.body.reverse_proxy && request.body.api !== 'custom') {
console.log('No key found for API', request.body.api); console.log('No key found for API', request.body.api);
return response.sendStatus(400); return response.sendStatus(400);
} }
@ -69,6 +73,10 @@ router.post('/caption-image', jsonParser, async (request, response) => {
apiUrl = `${request.body.reverse_proxy}/chat/completions`; apiUrl = `${request.body.reverse_proxy}/chat/completions`;
} }
if (request.body.api === 'custom') {
apiUrl = `${request.body.server_url}/chat/completions`;
}
const result = await fetch(apiUrl, { const result = await fetch(apiUrl, {
method: 'POST', method: 'POST',
headers: { headers: {