Add Vertex AI express mode support (#3977)

* Add Vertex AI express mode support
Split Google AI Studio and Vertex AI

* Add support for Vertex AI, including updating default models and related settings, modifying frontend HTML to include Vertex AI options, and adjusting request processing logic in the backend API.

* Log API name in the console

* Merge sysprompt toggles back

* Use Gemma tokenizers for Vertex and LearnLM

* AI Studio parity updates

* Add link to express mode doc. Also technically it's not a form

* Split title

* Use array includes

* Add support for Google Vertex AI in image captioning feature

* Specify caption API name, add to compression list

---------

Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
NijikaMyWaifu
2025-05-23 01:10:53 +08:00
committed by GitHub
parent 6dc59b9fd3
commit 157315cd68
18 changed files with 193 additions and 42 deletions

View File

@ -15,6 +15,7 @@
"custom_exclude_body": "",
"custom_include_headers": "",
"google_model": "gemini-pro",
"vertexai_model": "gemini-2.0-flash-001",
"temperature": 1,
"frequency_penalty": 0,
"presence_penalty": 0,

View File

@ -691,7 +691,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek,xai">
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,vertexai,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek,xai">
<div class="range-block-title" data-i18n="Temperature">
Temperature
</div>
@ -730,7 +730,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="claude,openrouter,makersuite,cohere,perplexity">
<div class="range-block" data-source="claude,openrouter,makersuite,vertexai,cohere,perplexity">
<div class="range-block-title" data-i18n="Top K">
Top K
</div>
@ -743,7 +743,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek,xai">
<div class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,vertexai,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek,xai">
<div class="range-block-title" data-i18n="Top P">
Top P
</div>
@ -1969,7 +1969,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="makersuite,openrouter,claude">
<div class="range-block" data-source="makersuite,vertexai,openrouter,claude">
<label for="openai_enable_web_search" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_enable_web_search" type="checkbox" />
<span data-i18n="Enable web search">Enable web search</span>
@ -1983,7 +1983,7 @@
</b>
</div>
</div>
<div class="range-block" data-source="openai,cohere,mistralai,custom,claude,openrouter,groq,deepseek,makersuite,ai21,xai,pollinations">
<div class="range-block" data-source="openai,cohere,mistralai,custom,claude,openrouter,groq,deepseek,makersuite,vertexai,ai21,xai,pollinations">
<label for="openai_function_calling" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_function_calling" type="checkbox" />
<span data-i18n="Enable function calling">Enable function calling</span>
@ -1993,7 +1993,7 @@
<span data-i18n="enable_functions_desc_3">Can be utilized by various extensions to provide additional functionality.</span>
</div>
</div>
<div class="range-block" data-source="openai,openrouter,mistralai,makersuite,claude,custom,01ai,xai,pollinations">
<div class="range-block" data-source="openai,openrouter,mistralai,makersuite,vertexai,claude,custom,01ai,xai,pollinations">
<label for="openai_image_inlining" class="checkbox_label flexWrap widthFreeExpand">
<input id="openai_image_inlining" type="checkbox" />
<span data-i18n="Send inline images">Send inline images</span>
@ -2018,7 +2018,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="makersuite">
<div class="range-block" data-source="makersuite,vertexai">
<label for="openai_request_images" class="checkbox_label widthFreeExpand">
<input id="openai_request_images" type="checkbox" />
<span>
@ -2030,12 +2030,12 @@
<span data-i18n="Allows the model to return image attachments.">
Allows the model to return image attachments.
</span>
<em data-source="makersuite" data-i18n="Request inline images_desc_2">
<em data-source="makersuite,vertexai" data-i18n="Request inline images_desc_2">
Incompatible with the following features: function calling, web search, system prompt.
</em>
</div>
</div>
<div class="range-block" data-source="makersuite">
<div class="range-block" data-source="makersuite,vertexai">
<label for="use_makersuite_sysprompt" class="checkbox_label widthFreeExpand">
<input id="use_makersuite_sysprompt" type="checkbox" />
<span>
@ -2060,12 +2060,12 @@
</span>
</div>
</div>
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom,claude,xai,makersuite,openrouter,pollinations">
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom,claude,xai,makersuite,vertexai,openrouter,pollinations">
<div class="flex-container oneline-dropdown" title="Constrains effort on reasoning for reasoning models.&#10;Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response." data-i18n="[title]Constrains effort on reasoning for reasoning models.">
<label for="openai_reasoning_effort">
<span data-i18n="Reasoning Effort">Reasoning Effort</span>
<i data-source="openai,custom,xai,openrouter" class="opacity50p fa-solid fa-circle-info" title="OpenAI-style options: low, medium, high. Minimum and maximum are aliased to low and high. Auto does not send an effort level." data-i18n="[title]OpenAI-style options: low, medium, high. Minimum and maximum are aliased to low and high. Auto does not send an effort level."></i>
<i data-source="claude,makersuite" class="opacity50p fa-solid fa-circle-info" title="Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%). Other options are model-dependent." data-i18n="[title]Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%). Other options are model-dependent."></i>
<i data-source="claude,makersuite,vertexai" class="opacity50p fa-solid fa-circle-info" title="Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%). Other options are model-dependent." data-i18n="[title]Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%). Other options are model-dependent."></i>
</label>
<select id="openai_reasoning_effort">
<option data-i18n="openai_reasoning_effort_auto" value="auto">Auto</option>
@ -2769,6 +2769,7 @@
<option value="deepseek">DeepSeek</option>
<option value="groq">Groq</option>
<option value="makersuite">Google AI Studio</option>
<option value="vertexai">Google Vertex AI (Express mode)</option>
<option value="mistralai">MistralAI</option>
<option value="nanogpt">NanoGPT</option>
<option value="openrouter">OpenRouter</option>
@ -2779,7 +2780,7 @@
<option value="xai">xAI (Grok)</option>
</optgroup>
</select>
<div class="inline-drawer wide100p" data-source="openai,claude,mistralai,makersuite,deepseek,xai">
<div class="inline-drawer wide100p" data-source="openai,claude,mistralai,makersuite,vertexai,deepseek,xai">
<div class="inline-drawer-toggle inline-drawer-header">
<b data-i18n="Reverse Proxy">Reverse Proxy</b>
<div class="fa-solid fa-circle-chevron-down inline-drawer-icon down"></div>
@ -2843,7 +2844,7 @@
</div>
</div>
</div>
<div id="ReverseProxyWarningMessage" data-source="openai,claude,mistralai,makersuite,deepseek,xai">
<div id="ReverseProxyWarningMessage" data-source="openai,claude,mistralai,makersuite,vertexai,deepseek,xai">
<div class="reverse_proxy_warning">
<b>
<div data-i18n="Using a proxy that you're not running yourself is a risk to your data privacy.">
@ -3209,6 +3210,38 @@
</select>
</div>
</form>
<div id="vertexai_form" data-source="vertexai">
<h4>
<span data-i18n="Google Vertex AI API Key">
Google Vertex AI API Key
</span>
<a href="https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/overview" data-i18n="(Express mode keys only)" target="_blank" rel="noopener noreferrer">
(Express mode keys only)
</a>
</h4>
<div class="flex-container">
<input id="api_key_vertexai" name="api_key_vertexai" class="text_pole flex1" value="" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_vertexai"></div>
</div>
<div data-for="api_key_vertexai" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
For privacy reasons, your API key will be hidden after you reload the page.
</div>
<div>
<h4 data-i18n="Google Model">Google Model</h4>
<select id="model_vertexai_select">
<optgroup label="Gemini 2.5">
<option value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
<option value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
<option value="gemini-2.5-flash-preview-05-20">gemini-2.5-flash-preview-05-20</option>
<option value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option>
</optgroup>
<optgroup label="Gemini 2.0">
<option value="gemini-2.0-flash-001">gemini-2.0-flash-001</option>
<option value="gemini-2.0-flash-lite-001">gemini-2.0-flash-lite-001</option>
</optgroup>
</select>
</div>
</div>
<form id="mistralai_form" data-source="mistralai" action="javascript:void(null);" method="post" enctype="multipart/form-data">
<h4 data-i18n="MistralAI API Key">MistralAI API Key</h4>
<div class="flex-container">

View File

@ -5941,6 +5941,7 @@ function extractImageFromData(data, { mainApi = null, chatCompletionSource = nul
switch (mainApi ?? main_api) {
case 'openai': {
switch (chatCompletionSource ?? oai_settings.chat_completion_source) {
case chat_completion_sources.VERTEXAI:
case chat_completion_sources.MAKERSUITE: {
const inlineData = data?.responseContent?.parts?.find(x => x.inlineData)?.inlineData;
if (inlineData) {

View File

@ -402,6 +402,7 @@ function RA_autoconnect(PrevApi) {
|| (secret_state[SECRET_KEYS.OPENROUTER] && oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER)
|| (secret_state[SECRET_KEYS.AI21] && oai_settings.chat_completion_source == chat_completion_sources.AI21)
|| (secret_state[SECRET_KEYS.MAKERSUITE] && oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE)
|| (secret_state[SECRET_KEYS.VERTEXAI] && oai_settings.chat_completion_source == chat_completion_sources.VERTEXAI)
|| (secret_state[SECRET_KEYS.MISTRALAI] && oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI)
|| (secret_state[SECRET_KEYS.COHERE] && oai_settings.chat_completion_source == chat_completion_sources.COHERE)
|| (secret_state[SECRET_KEYS.PERPLEXITY] && oai_settings.chat_completion_source == chat_completion_sources.PERPLEXITY)

View File

@ -414,6 +414,7 @@ jQuery(async function () {
'openai': SECRET_KEYS.OPENAI,
'mistral': SECRET_KEYS.MISTRALAI,
'google': SECRET_KEYS.MAKERSUITE,
'vertexai': SECRET_KEYS.VERTEXAI,
'anthropic': SECRET_KEYS.CLAUDE,
};

View File

@ -22,6 +22,7 @@
<option value="cohere">Cohere</option>
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option>
<option value="google">Google AI Studio</option>
<option value="vertexai">Google Vertex AI</option>
<option value="groq">Groq</option>
<option value="koboldcpp">KoboldCpp</option>
<option value="llamacpp">llama.cpp</option>
@ -111,6 +112,12 @@
<option data-type="google" value="gemini-1.5-flash-8b-exp-0827">gemini-1.5-flash-8b-exp-0827</option>
<option data-type="google" value="learnlm-2.0-flash-experimental">learnlm-2.0-flash-experimental</option>
<option data-type="google" value="learnlm-1.5-pro-experimental">learnlm-1.5-pro-experimental</option>
<option data-type="vertexai" value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
<option data-type="vertexai" value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
<option data-type="vertexai" value="gemini-2.5-flash-preview-05-20">gemini-2.5-flash-preview-05-20</option>
<option data-type="vertexai" value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option>
<option data-type="vertexai" value="gemini-2.0-flash-001">gemini-2.0-flash-001</option>
<option data-type="vertexai" value="gemini-2.0-flash-lite-001">gemini-2.0-flash-lite-001</option>
<option data-type="groq" value="llama-3.2-11b-vision-preview">llama-3.2-11b-vision-preview</option>
<option data-type="groq" value="llama-3.2-90b-vision-preview">llama-3.2-90b-vision-preview</option>
<option data-type="groq" value="llava-v1.5-7b-4096-preview">llava-v1.5-7b-4096-preview</option>
@ -170,7 +177,7 @@
<div data-type="ollama">
The model must be downloaded first! Do it with the <code>ollama pull</code> command or <a href="#" id="caption_ollama_pull">click here</a>.
</div>
<label data-type="openai,anthropic,google,mistral" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<label data-type="openai,anthropic,google,vertexai,mistral" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid.">
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox">
<span data-i18n="Allow reverse proxy">Allow reverse proxy</span>
</label>

View File

@ -15,7 +15,7 @@ import { createThumbnail, isValidUrl } from '../utils.js';
*/
export async function getMultimodalCaption(base64Img, prompt) {
const useReverseProxy =
(['openai', 'anthropic', 'google', 'mistral'].includes(extension_settings.caption.multimodal_api))
(['openai', 'anthropic', 'google', 'mistral', 'vertexai'].includes(extension_settings.caption.multimodal_api))
&& extension_settings.caption.allow_reverse_proxy
&& oai_settings.reverse_proxy
&& isValidUrl(oai_settings.reverse_proxy);
@ -38,7 +38,8 @@ export async function getMultimodalCaption(base64Img, prompt) {
const isVllm = extension_settings.caption.multimodal_api === 'vllm';
const base64Bytes = base64Img.length * 0.75;
const compressionLimit = 2 * 1024 * 1024;
if ((['google', 'openrouter', 'mistral', 'groq'].includes(extension_settings.caption.multimodal_api) && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const thumbnailNeeded = ['google', 'openrouter', 'mistral', 'groq', 'vertexai'].includes(extension_settings.caption.multimodal_api);
if ((thumbnailNeeded && base64Bytes > compressionLimit) || isOoba || isKoboldCpp) {
const maxSide = 1024;
base64Img = await createThumbnail(base64Img, maxSide, maxSide, 'image/jpeg');
}
@ -94,6 +95,7 @@ export async function getMultimodalCaption(base64Img, prompt) {
function getEndpointUrl() {
switch (extension_settings.caption.multimodal_api) {
case 'google':
case 'vertexai':
return '/api/google/caption-image';
case 'anthropic':
return '/api/anthropic/caption-image';
@ -143,6 +145,10 @@ function throwIfInvalidModel(useReverseProxy) {
throw new Error('Google AI Studio API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'vertexai' && !secret_state[SECRET_KEYS.VERTEXAI] && !useReverseProxy) {
throw new Error('Google Vertex AI API key is not set.');
}
if (extension_settings.caption.multimodal_api === 'mistral' && !secret_state[SECRET_KEYS.MISTRALAI] && !useReverseProxy) {
throw new Error('Mistral AI API key is not set.');
}

View File

@ -176,6 +176,7 @@ export const chat_completion_sources = {
OPENROUTER: 'openrouter',
AI21: 'ai21',
MAKERSUITE: 'makersuite',
VERTEXAI: 'vertexai',
MISTRALAI: 'mistralai',
CUSTOM: 'custom',
COHERE: 'cohere',
@ -277,6 +278,7 @@ export const settingsToUpdate = {
custom_include_headers: ['#custom_include_headers', 'custom_include_headers', false, true],
custom_prompt_post_processing: ['#custom_prompt_post_processing', 'custom_prompt_post_processing', false, true],
google_model: ['#model_google_select', 'google_model', false, true],
vertexai_model: ['#model_vertexai_select', 'vertexai_model', false, true],
openai_max_context: ['#openai_max_context', 'openai_max_context', false, false],
openai_max_tokens: ['#openai_max_tokens', 'openai_max_tokens', false, false],
wrap_in_quotes: ['#wrap_in_quotes', 'wrap_in_quotes', true, false],
@ -350,6 +352,7 @@ const default_settings = {
openai_model: 'gpt-4-turbo',
claude_model: 'claude-3-5-sonnet-20240620',
google_model: 'gemini-1.5-pro',
vertexai_model: 'gemini-2.0-flash-001',
ai21_model: 'jamba-1.6-large',
mistralai_model: 'mistral-large-latest',
cohere_model: 'command-r-plus',
@ -433,6 +436,7 @@ const oai_settings = {
openai_model: 'gpt-4-turbo',
claude_model: 'claude-3-5-sonnet-20240620',
google_model: 'gemini-1.5-pro',
vertexai_model: 'gemini-2.0-flash-001',
ai21_model: 'jamba-1.6-large',
mistralai_model: 'mistral-large-latest',
cohere_model: 'command-r-plus',
@ -1666,6 +1670,8 @@ export function getChatCompletionModel(source = null) {
return '';
case chat_completion_sources.MAKERSUITE:
return oai_settings.google_model;
case chat_completion_sources.VERTEXAI:
return oai_settings.vertexai_model;
case chat_completion_sources.OPENROUTER:
return oai_settings.openrouter_model !== openrouter_website_model ? oai_settings.openrouter_model : null;
case chat_completion_sources.AI21:
@ -2048,6 +2054,7 @@ async function sendOpenAIRequest(type, messages, signal) {
const isOpenRouter = oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER;
const isScale = oai_settings.chat_completion_source == chat_completion_sources.SCALE;
const isGoogle = oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE;
const isVertexAI = oai_settings.chat_completion_source == chat_completion_sources.VERTEXAI;
const isOAI = oai_settings.chat_completion_source == chat_completion_sources.OPENAI;
const isMistral = oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI;
const isCustom = oai_settings.chat_completion_source == chat_completion_sources.CUSTOM;
@ -2123,8 +2130,8 @@ async function sendOpenAIRequest(type, messages, signal) {
delete generate_data.stop;
}
// Proxy is only supported for Claude, OpenAI, Mistral, and Google MakerSuite
if (oai_settings.reverse_proxy && [chat_completion_sources.CLAUDE, chat_completion_sources.OPENAI, chat_completion_sources.MISTRALAI, chat_completion_sources.MAKERSUITE, chat_completion_sources.DEEPSEEK, chat_completion_sources.XAI].includes(oai_settings.chat_completion_source)) {
// Proxy is only supported for Claude, OpenAI, Mistral, Google MakerSuite, and Vertex AI
if (oai_settings.reverse_proxy && [chat_completion_sources.CLAUDE, chat_completion_sources.OPENAI, chat_completion_sources.MISTRALAI, chat_completion_sources.MAKERSUITE, chat_completion_sources.VERTEXAI, chat_completion_sources.DEEPSEEK, chat_completion_sources.XAI].includes(oai_settings.chat_completion_source)) {
await validateReverseProxy();
generate_data['reverse_proxy'] = oai_settings.reverse_proxy;
generate_data['proxy_password'] = oai_settings.proxy_password;
@ -2175,7 +2182,7 @@ async function sendOpenAIRequest(type, messages, signal) {
generate_data['api_url_scale'] = oai_settings.api_url_scale;
}
if (isGoogle) {
if (isGoogle || isVertexAI) {
const stopStringsLimit = 5;
generate_data['top_k'] = Number(oai_settings.top_k_openai);
generate_data['stop'] = getCustomStoppingStrings(stopStringsLimit).slice(0, stopStringsLimit).filter(x => x.length >= 1 && x.length <= 16);
@ -2382,7 +2389,7 @@ export function getStreamingReply(data, state, { chatCompletionSource = null, ov
state.reasoning += data?.delta?.thinking || '';
}
return data?.delta?.text || '';
} else if (chat_completion_source === chat_completion_sources.MAKERSUITE) {
} else if ([chat_completion_sources.MAKERSUITE, chat_completion_sources.VERTEXAI].includes(chat_completion_source)) {
const inlineData = data?.candidates?.[0]?.content?.parts?.find(x => x.inlineData)?.inlineData;
if (inlineData) {
state.image = `data:${inlineData.mimeType};base64,${inlineData.data}`;
@ -2772,7 +2779,13 @@ class Message {
* @returns {Promise<string>} Compressed image as a Data URL.
*/
async compressImage(image) {
if ([chat_completion_sources.OPENROUTER, chat_completion_sources.MAKERSUITE, chat_completion_sources.MISTRALAI].includes(oai_settings.chat_completion_source)) {
const compressImageSources = [
chat_completion_sources.OPENROUTER,
chat_completion_sources.MAKERSUITE,
chat_completion_sources.MISTRALAI,
chat_completion_sources.VERTEXAI,
];
if (compressImageSources.includes(oai_settings.chat_completion_source)) {
const sizeThreshold = 2 * 1024 * 1024;
const dataSize = image.length * 0.75;
const maxSide = 1024;
@ -3368,6 +3381,7 @@ function loadOpenAISettings(data, settings) {
oai_settings.custom_include_headers = settings.custom_include_headers ?? default_settings.custom_include_headers;
oai_settings.custom_prompt_post_processing = settings.custom_prompt_post_processing ?? default_settings.custom_prompt_post_processing;
oai_settings.google_model = settings.google_model ?? default_settings.google_model;
oai_settings.vertexai_model = settings.vertexai_model ?? default_settings.vertexai_model;
oai_settings.chat_completion_source = settings.chat_completion_source ?? default_settings.chat_completion_source;
oai_settings.api_url_scale = settings.api_url_scale ?? default_settings.api_url_scale;
oai_settings.show_external_models = settings.show_external_models ?? default_settings.show_external_models;
@ -3432,6 +3446,8 @@ function loadOpenAISettings(data, settings) {
$(`#model_windowai_select option[value="${oai_settings.windowai_model}"`).prop('selected', true);
$('#model_google_select').val(oai_settings.google_model);
$(`#model_google_select option[value="${oai_settings.google_model}"`).prop('selected', true);
$('#model_vertexai_select').val(oai_settings.vertexai_model);
$(`#model_vertexai_select option[value="${oai_settings.vertexai_model}"`).prop('selected', true);
$('#model_ai21_select').val(oai_settings.ai21_model);
$(`#model_ai21_select option[value="${oai_settings.ai21_model}"`).prop('selected', true);
$('#model_mistralai_select').val(oai_settings.mistralai_model);
@ -3627,6 +3643,7 @@ async function getStatusOpen() {
chat_completion_sources.CLAUDE,
chat_completion_sources.AI21,
chat_completion_sources.MAKERSUITE,
chat_completion_sources.VERTEXAI,
chat_completion_sources.PERPLEXITY,
chat_completion_sources.GROQ,
];
@ -3648,7 +3665,16 @@ async function getStatusOpen() {
chat_completion_source: oai_settings.chat_completion_source,
};
if (oai_settings.reverse_proxy && [chat_completion_sources.CLAUDE, chat_completion_sources.OPENAI, chat_completion_sources.MISTRALAI, chat_completion_sources.MAKERSUITE, chat_completion_sources.DEEPSEEK, chat_completion_sources.XAI].includes(oai_settings.chat_completion_source)) {
const validateProxySources = [
chat_completion_sources.CLAUDE,
chat_completion_sources.OPENAI,
chat_completion_sources.MISTRALAI,
chat_completion_sources.MAKERSUITE,
chat_completion_sources.VERTEXAI,
chat_completion_sources.DEEPSEEK,
chat_completion_sources.XAI,
];
if (oai_settings.reverse_proxy && validateProxySources.includes(oai_settings.chat_completion_source)) {
await validateReverseProxy();
}
@ -3740,6 +3766,7 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
custom_include_headers: settings.custom_include_headers,
custom_prompt_post_processing: settings.custom_prompt_post_processing,
google_model: settings.google_model,
vertexai_model: settings.vertexai_model,
temperature: settings.temp_openai,
frequency_penalty: settings.freq_pen_openai,
presence_penalty: settings.pres_pen_openai,
@ -4495,6 +4522,11 @@ async function onModelChange() {
oai_settings.google_model = value;
}
if ($(this).is('#model_vertexai_select')) {
console.log('Vertex AI model changed to', value);
oai_settings.vertexai_model = value;
}
if ($(this).is('#model_mistralai_select')) {
// Upgrade old mistral models to new naming scheme
// would have done this in loadOpenAISettings, but it wasn't updating on preset change?
@ -4575,7 +4607,7 @@ async function onModelChange() {
$('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input');
}
if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
if ([chat_completion_sources.MAKERSUITE, chat_completion_sources.VERTEXAI].includes(oai_settings.chat_completion_source)) {
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', max_2mil);
} else if (value.includes('gemini-1.5-pro')) {
@ -4943,6 +4975,19 @@ async function onConnectButtonClick(e) {
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.VERTEXAI) {
const api_key_vertexai = String($('#api_key_vertexai').val()).trim();
if (api_key_vertexai.length) {
await writeSecret(SECRET_KEYS.VERTEXAI, api_key_vertexai);
}
if (!secret_state[SECRET_KEYS.VERTEXAI] && !oai_settings.reverse_proxy) {
console.log('No secret key saved for Vertex AI');
return;
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
const api_key_claude = String($('#api_key_claude').val()).trim();
@ -5120,6 +5165,9 @@ function toggleChatCompletionForms() {
else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
$('#model_google_select').trigger('change');
}
else if (oai_settings.chat_completion_source == chat_completion_sources.VERTEXAI) {
$('#model_vertexai_select').trigger('change');
}
else if (oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER) {
$('#model_openrouter_select').trigger('change');
}
@ -5281,6 +5329,8 @@ export function isImageInliningSupported() {
);
case chat_completion_sources.MAKERSUITE:
return visionSupportedModels.some(model => oai_settings.google_model.includes(model));
case chat_completion_sources.VERTEXAI:
return visionSupportedModels.some(model => oai_settings.vertexai_model.includes(model));
case chat_completion_sources.CLAUDE:
return visionSupportedModels.some(model => oai_settings.claude_model.includes(model));
case chat_completion_sources.OPENROUTER:
@ -5887,6 +5937,7 @@ export function initOpenAI() {
$('#model_windowai_select').on('change', onModelChange);
$('#model_scale_select').on('change', onModelChange);
$('#model_google_select').on('change', onModelChange);
$('#model_vertexai_select').on('change', onModelChange);
$('#model_openrouter_select').on('change', onModelChange);
$('#openrouter_group_models').on('change', onOpenrouterModelSortChange);
$('#openrouter_sort_models').on('change', onOpenrouterModelSortChange);

View File

@ -114,6 +114,7 @@ export function extractReasoningFromData(data, {
case chat_completion_sources.OPENROUTER:
return data?.choices?.[0]?.message?.reasoning ?? '';
case chat_completion_sources.MAKERSUITE:
case chat_completion_sources.VERTEXAI:
return data?.responseContent?.parts?.filter(part => part.thought)?.map(part => part.text)?.join('\n\n') ?? '';
case chat_completion_sources.CLAUDE:
return data?.content?.find(part => part.type === 'thinking')?.thinking ?? '';

View File

@ -16,6 +16,7 @@ export const SECRET_KEYS = {
AI21: 'api_key_ai21',
SCALE_COOKIE: 'scale_cookie',
MAKERSUITE: 'api_key_makersuite',
VERTEXAI: 'api_key_vertexai',
SERPAPI: 'api_key_serpapi',
MISTRALAI: 'api_key_mistralai',
TOGETHERAI: 'api_key_togetherai',
@ -56,6 +57,7 @@ const INPUT_MAP = {
[SECRET_KEYS.AI21]: '#api_key_ai21',
[SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie',
[SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite',
[SECRET_KEYS.VERTEXAI]: '#api_key_vertexai',
[SECRET_KEYS.VLLM]: '#api_key_vllm',
[SECRET_KEYS.APHRODITE]: '#api_key_aphrodite',
[SECRET_KEYS.TABBY]: '#api_key_tabby',

View File

@ -4127,6 +4127,7 @@ function getModelOptions(quiet) {
{ id: 'model_openrouter_select', api: 'openai', type: chat_completion_sources.OPENROUTER },
{ id: 'model_ai21_select', api: 'openai', type: chat_completion_sources.AI21 },
{ id: 'model_google_select', api: 'openai', type: chat_completion_sources.MAKERSUITE },
{ id: 'model_vertexai_select', api: 'openai', type: chat_completion_sources.VERTEXAI },
{ id: 'model_mistralai_select', api: 'openai', type: chat_completion_sources.MISTRALAI },
{ id: 'custom_model_id', api: 'openai', type: chat_completion_sources.CUSTOM },
{ id: 'model_cohere_select', api: 'openai', type: chat_completion_sources.COHERE },

View File

@ -676,6 +676,10 @@ export function getTokenizerModel() {
return gemmaTokenizer;
}
if (oai_settings.chat_completion_source == chat_completion_sources.VERTEXAI) {
return gemmaTokenizer;
}
if (oai_settings.chat_completion_source == chat_completion_sources.AI21) {
return jambaTokenizer;
}

View File

@ -592,6 +592,7 @@ export class ToolManager {
chat_completion_sources.COHERE,
chat_completion_sources.DEEPSEEK,
chat_completion_sources.MAKERSUITE,
chat_completion_sources.VERTEXAI,
chat_completion_sources.AI21,
chat_completion_sources.XAI,
chat_completion_sources.POLLINATIONS,

View File

@ -168,6 +168,7 @@ export const CHAT_COMPLETION_SOURCES = {
OPENROUTER: 'openrouter',
AI21: 'ai21',
MAKERSUITE: 'makersuite',
VERTEXAI: 'vertexai',
MISTRALAI: 'mistralai',
CUSTOM: 'custom',
COHERE: 'cohere',

View File

@ -52,6 +52,7 @@ const API_COHERE_V2 = 'https://api.cohere.ai/v2';
const API_PERPLEXITY = 'https://api.perplexity.ai';
const API_GROQ = 'https://api.groq.com/openai/v1';
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
const API_VERTEX_AI = 'https://us-central1-aiplatform.googleapis.com';
const API_01AI = 'https://api.lingyiwanwu.com/v1';
const API_AI21 = 'https://api.ai21.com/studio/v1';
const API_NANOGPT = 'https://nano-gpt.com/api/v1';
@ -337,13 +338,28 @@ async function sendScaleRequest(request, response) {
* @param {express.Response} response Express response
*/
async function sendMakerSuiteRequest(request, response) {
const apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE);
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE);
const useVertexAi = request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.VERTEXAI;
const apiName = useVertexAi ? 'Google Vertex AI' : 'Google AI Studio';
let apiUrl;
let apiKey;
if (useVertexAi) {
apiUrl = new URL(request.body.reverse_proxy || API_VERTEX_AI);
apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.VERTEXAI);
if (!request.body.reverse_proxy && !apiKey) {
console.warn('Google AI Studio API key is missing.');
console.warn(`${apiName} API key is missing.`);
return response.status(400).send({ error: true });
}
} else {
apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE);
apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE);
if (!request.body.reverse_proxy && !apiKey) {
console.warn(`${apiName} API key is missing.`);
return response.status(400).send({ error: true });
}
}
const model = String(request.body.model);
const stream = Boolean(request.body.stream);
@ -465,7 +481,7 @@ async function sendMakerSuiteRequest(request, response) {
}
const body = getGeminiBody();
console.debug('Google AI Studio request:', body);
console.debug(`${apiName} request:`, body);
try {
const controller = new AbortController();
@ -477,7 +493,13 @@ async function sendMakerSuiteRequest(request, response) {
const apiVersion = getConfigValue('gemini.apiVersion', 'v1beta');
const responseType = (stream ? 'streamGenerateContent' : 'generateContent');
const generateResponse = await fetch(`${apiUrl.toString().replace(/\/$/, '')}/${apiVersion}/models/${model}:${responseType}?key=${apiKey}${stream ? '&alt=sse' : ''}`, {
let url;
if (useVertexAi) {
url = `${apiUrl.toString().replace(/\/$/, '')}/v1/publishers/google/models/${model}:${responseType}?key=${apiKey}${stream ? '&alt=sse' : ''}`;
} else {
url = `${apiUrl.toString().replace(/\/$/, '')}/${apiVersion}/models/${model}:${responseType}?key=${apiKey}${stream ? '&alt=sse' : ''}`;
}
const generateResponse = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: {
@ -498,7 +520,7 @@ async function sendMakerSuiteRequest(request, response) {
}
} else {
if (!generateResponse.ok) {
console.warn(`Google AI Studio API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
console.warn(`${apiName} API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(500).send({ error: true });
}
@ -507,7 +529,7 @@ async function sendMakerSuiteRequest(request, response) {
const candidates = generateResponseJson?.candidates;
if (!candidates || candidates.length === 0) {
let message = 'Google AI Studio API returned no candidate';
let message = `${apiName} API returned no candidate`;
console.warn(message, generateResponseJson);
if (generateResponseJson?.promptFeedback?.blockReason) {
message += `\nPrompt was blocked due to : ${generateResponseJson.promptFeedback.blockReason}`;
@ -518,11 +540,11 @@ async function sendMakerSuiteRequest(request, response) {
const responseContent = candidates[0].content ?? candidates[0].output;
const functionCall = (candidates?.[0]?.content?.parts ?? []).some(part => part.functionCall);
const inlineData = (candidates?.[0]?.content?.parts ?? []).some(part => part.inlineData);
console.debug('Google AI Studio response:', util.inspect(generateResponseJson, { depth: 5, colors: true }));
console.debug(`${apiName} response:`, util.inspect(generateResponseJson, { depth: 5, colors: true }));
const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.filter(part => !part.thought)?.map(part => part.text)?.join('\n\n');
if (!responseText && !functionCall && !inlineData) {
let message = 'Google AI Studio Candidate text empty';
let message = `${apiName} Candidate text empty`;
console.warn(message, generateResponseJson);
return response.send({ error: { message } });
}
@ -532,7 +554,7 @@ async function sendMakerSuiteRequest(request, response) {
return response.send(reply);
}
} catch (error) {
console.error('Error communicating with Google AI Studio API: ', error);
console.error(`Error communicating with ${apiName} API:`, error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
@ -1189,6 +1211,7 @@ router.post('/generate', function (request, response) {
case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response);
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response);
case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response);
case CHAT_COMPLETION_SOURCES.VERTEXAI: return sendMakerSuiteRequest(request, response);
case CHAT_COMPLETION_SOURCES.MISTRALAI: return sendMistralAIRequest(request, response);
case CHAT_COMPLETION_SOURCES.COHERE: return sendCohereRequest(request, response);
case CHAT_COMPLETION_SOURCES.DEEPSEEK: return sendDeepSeekRequest(request, response);

View File

@ -7,6 +7,7 @@ import { readSecret, SECRET_KEYS } from './secrets.js';
import { GEMINI_SAFETY } from '../constants.js';
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
const API_VERTEX_AI = 'https://us-central1-aiplatform.googleapis.com';
export const router = express.Router();
@ -14,12 +15,27 @@ router.post('/caption-image', async (request, response) => {
try {
const mimeType = request.body.image.split(';')[0].split(':')[1];
const base64Data = request.body.image.split(',')[1];
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE);
const apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE);
const useVertexAi = request.body.api === 'vertexai';
const apiName = useVertexAi ? 'Google Vertex AI' : 'Google AI Studio';
let apiKey;
let apiUrl;
if (useVertexAi) {
apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.VERTEXAI);
apiUrl = new URL(request.body.reverse_proxy || API_VERTEX_AI);
} else {
apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE);
apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE);
}
const model = request.body.model || 'gemini-2.0-flash';
const url = `${apiUrl.origin}/v1beta/models/${model}:generateContent?key=${apiKey}`;
let url;
if (useVertexAi) {
url = `${apiUrl.origin}/v1/publishers/google/models/${model}:generateContent?key=${apiKey}`;
} else {
url = `${apiUrl.origin}/v1beta/models/${model}:generateContent?key=${apiKey}`;
}
const body = {
contents: [{
role: 'user',
parts: [
{ text: request.body.prompt },
{
@ -32,7 +48,7 @@ router.post('/caption-image', async (request, response) => {
safetySettings: GEMINI_SAFETY,
};
console.debug('Multimodal captioning request', model, body);
console.debug(`${apiName} captioning request`, model, body);
const result = await fetch(url, {
body: JSON.stringify(body),
@ -44,13 +60,13 @@ router.post('/caption-image', async (request, response) => {
if (!result.ok) {
const error = await result.json();
console.error(`Google AI Studio API returned error: ${result.status} ${result.statusText}`, error);
console.error(`${apiName} API returned error: ${result.status} ${result.statusText}`, error);
return response.status(500).send({ error: true });
}
/** @type {any} */
const data = await result.json();
console.info('Multimodal captioning response', data);
console.info(`${apiName} captioning response`, data);
const candidates = data?.candidates;
if (!candidates) {

View File

@ -26,6 +26,7 @@ export const SECRET_KEYS = {
ONERING_URL: 'oneringtranslator_url',
DEEPLX_URL: 'deeplx_url',
MAKERSUITE: 'api_key_makersuite',
VERTEXAI: 'api_key_vertexai',
SERPAPI: 'api_key_serpapi',
TOGETHERAI: 'api_key_togetherai',
MISTRALAI: 'api_key_mistralai',

View File

@ -463,7 +463,7 @@ export function getTokenizerModel(requestModel) {
return 'deepseek';
}
if (requestModel.includes('gemma') || requestModel.includes('gemini')) {
if (requestModel.includes('gemma') || requestModel.includes('gemini') || requestModel.includes('learnlm')) {
return 'gemma';
}