Groq: Add new models and multimodal captions

This commit is contained in:
Cohee
2024-10-03 08:41:45 +03:00
parent 85b0d135f1
commit 6706cce10d
6 changed files with 47 additions and 12 deletions

View File

@ -403,6 +403,7 @@ jQuery(async function () {
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openai' && (secret_state[SECRET_KEYS.OPENAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'openrouter' && secret_state[SECRET_KEYS.OPENROUTER]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'zerooneai' && secret_state[SECRET_KEYS.ZEROONEAI]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'groq' && secret_state[SECRET_KEYS.GROQ]) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'mistral' && (secret_state[SECRET_KEYS.MISTRALAI] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'google' && (secret_state[SECRET_KEYS.MAKERSUITE] || extension_settings.caption.allow_reverse_proxy)) ||
(extension_settings.caption.source === 'multimodal' && extension_settings.caption.multimodal_api === 'anthropic' && (secret_state[SECRET_KEYS.CLAUDE] || extension_settings.caption.allow_reverse_proxy)) ||

View File

@ -21,6 +21,7 @@
<option value="anthropic">Anthropic</option>
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
<option value="google">Google AI Studio</option>
<option value="groq">Groq</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="llamacpp">llama.cpp</option>
<option value="mistral">MistralAI</option>
@ -60,6 +61,9 @@
<option data-type="google" value="gemini-1.5-pro-exp-0801">gemini-1.5-pro-exp-0801</option>
<option data-type="google" value="gemini-1.5-pro-exp-0827">gemini-1.5-pro-exp-0827</option>
<option data-type="google" value="gemini-pro-vision">gemini-pro-vision</option>
<option data-type="groq" value="llama-3.2-11b-vision-preview">llama-3.2-11b-vision-preview</option>
<option data-type="groq" value="llama-3.2-90b-vision-preview">llama-3.2-90b-vision-preview</option>
<option data-type="groq" value="llava-v1.5-7b-4096-preview">llava-v1.5-7b-4096-preview</option>
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option>
<option data-type="openrouter" value="openai/gpt-4o">openai/gpt-4o</option>
<option data-type="openrouter" value="openai/gpt-4o-2024-05-13">openai/gpt-4o-2024-05-13</option>

View File

@ -36,7 +36,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isVllm = extension_settings.caption.multimodal_api === 'vllm';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter', 'mistral'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
if ((['google', 'openrouter', 'mistral', 'groq'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
}
@ -135,6 +135,10 @@ function throwIfInvalidModel(useReverseProxy) {
throw new Error('01.AI API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'groq' && !secret_state[SECRET_KEYS.GROQ]) {
throw new Error('Groq API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'google' && !secret_state[SECRET_KEYS.MAKERSUITE] && !useReverseProxy) {
throw new Error('Google AI Studio API key is not set.');
}