mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add local multimodal caption sources
This commit is contained in:
@ -4,6 +4,7 @@ import { callPopup, getRequestHeaders, saveSettingsDebounced, substituteParams }
|
||||
import { getMessageTimeStamp } from '../../RossAscends-mods.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getMultimodalCaption } from '../shared.js';
|
||||
import { textgen_types, textgenerationwebui_settings } from '../../textgen-settings.js';
|
||||
export { MODULE_NAME };
|
||||
|
||||
const MODULE_NAME = 'caption';
|
||||
@ -134,7 +135,7 @@ async function doCaptionRequest(base64Img, fileData) {
|
||||
case 'horde':
|
||||
return await captionHorde(base64Img);
|
||||
case 'multimodal':
|
||||
return await captionMultimodal(extension_settings.caption.multimodal_api === 'google' ? base64Img : fileData);
|
||||
return await captionMultimodal(fileData);
|
||||
default:
|
||||
throw new Error('Unknown caption source.');
|
||||
}
|
||||
@ -271,9 +272,11 @@ jQuery(function () {
|
||||
$(sendButton).on('click', () => {
|
||||
const hasCaptionModule =
|
||||
(modules.includes('caption') && extension_settings.caption.source === 'extras') ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && secret_state[SECRET_KEYS.OPENAI]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && secret_state[SECRET_KEYS.MAKERSUITE]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'ollama' && textgenerationwebui_settings.server_urls[textgen_types.OLLAMA]) ||
|
||||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'llamacpp' && textgenerationwebui_settings.server_urls[textgen_types.LLAMACPP]) ||
|
||||
extension_settings.caption.source === 'local' ||
|
||||
extension_settings.caption.source === 'horde';
|
||||
|
||||
@ -329,7 +332,7 @@ jQuery(function () {
|
||||
<label for="caption_source">Source</label>
|
||||
<select id="caption_source" class="text_pole">
|
||||
<option value="local">Local</option>
|
||||
<option value="multimodal">Multimodal (OpenAI / OpenRouter / Google)</option>
|
||||
<option value="multimodal">Multimodal (OpenAI / llama / Google)</option>
|
||||
<option value="extras">Extras</option>
|
||||
<option value="horde">Horde</option>
|
||||
</select>
|
||||
@ -337,9 +340,11 @@ jQuery(function () {
|
||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||
<label for="caption_multimodal_api">API</label>
|
||||
<select id="caption_multimodal_api" class="flex1 text_pole">
|
||||
<option value="llamacpp">llama.cpp</option>
|
||||
<option value="ollama">Ollama</option>
|
||||
<option value="openai">OpenAI</option>
|
||||
<option value="openrouter">OpenRouter</option>
|
||||
<option value="google">Google</option>
|
||||
<option value="google">Google MakerSuite</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex1 flex-container flexFlowColumn flexNoGap">
|
||||
@ -349,12 +354,19 @@ jQuery(function () {
|
||||
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
|
||||
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
|
||||
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option>
|
||||
<option data-type="ollama" value="ollama_current">[Currently selected]</option>
|
||||
<option data-type="ollama" value="bakllava:latest">bakllava:latest</option>
|
||||
<option data-type="ollama" value="llava:latest">llava:latest</option>
|
||||
<option data-type="llamacpp" value="llamacpp_current">[Currently loaded]</option>
|
||||
</select>
|
||||
</div>
|
||||
<label data-type="openai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
|
||||
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
|
||||
Allow reverse proxy
|
||||
</label>
|
||||
<div class="flexBasis100p m-b-1">
|
||||
<small><b>Hint:</b> Set your API keys and endpoints in the 'API Connections' tab first.</small>
|
||||
</div>
|
||||
</div>
|
||||
<div id="caption_prompt_block">
|
||||
<label for="caption_prompt">Caption Prompt</label>
|
||||
|
Reference in New Issue
Block a user