mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Gemini inline video (#4078)
* Add inline video attachment support for Gemini 2.5 Pro * file formatting * removed redundant function for saving video to message * removed other redundant function for saving video to message * Seperate inlining check for video * Edit video token cost to be a conservative estimate of 10000 tokens * fixed missing semicolon * Adds seperate ui toggle for video inlining. * Move mes_video out of img_container * Remove title from video element for now * Better visibilty of video with controls --------- Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
@@ -211,6 +211,12 @@ export async function populateFileAttachment(message, inputId = 'file_form_input
|
||||
const imageUrl = await saveBase64AsFile(base64Data, name2, fileNamePrefix, extension);
|
||||
message.extra.image = imageUrl;
|
||||
message.extra.inline_image = true;
|
||||
}
|
||||
// If file is video
|
||||
else if (file.type.startsWith('video/')) {
|
||||
const extension = file.type.split('/')[1];
|
||||
const videoUrl = await saveBase64AsFile(base64Data, name2, fileNamePrefix, extension);
|
||||
message.extra.video = videoUrl;
|
||||
} else {
|
||||
const uniqueFileName = `${fileNamePrefix}.txt`;
|
||||
|
||||
@@ -1240,7 +1246,7 @@ async function openAttachmentManager() {
|
||||
|
||||
return () => {
|
||||
modalButtonData.forEach(p => {
|
||||
const { popper,bodyListener } = p;
|
||||
const { popper, bodyListener } = p;
|
||||
popper.destroy();
|
||||
document.body.removeEventListener('click', bodyListener);
|
||||
});
|
||||
|
@@ -313,6 +313,7 @@ export const settingsToUpdate = {
|
||||
squash_system_messages: ['#squash_system_messages', 'squash_system_messages', true, false],
|
||||
image_inlining: ['#openai_image_inlining', 'image_inlining', true, false],
|
||||
inline_image_quality: ['#openai_inline_image_quality', 'inline_image_quality', false, false],
|
||||
video_inlining: ['#openai_video_inlining', 'video_inlining', true, false],
|
||||
continue_prefill: ['#continue_prefill', 'continue_prefill', true, false],
|
||||
continue_postfix: ['#continue_postfix', 'continue_postfix', false, false],
|
||||
function_calling: ['#openai_function_calling', 'function_calling', true, false],
|
||||
@@ -396,6 +397,7 @@ const default_settings = {
|
||||
squash_system_messages: false,
|
||||
image_inlining: false,
|
||||
inline_image_quality: 'low',
|
||||
video_inlining: false,
|
||||
bypass_status_check: false,
|
||||
continue_prefill: false,
|
||||
function_calling: false,
|
||||
@@ -482,6 +484,7 @@ const oai_settings = {
|
||||
squash_system_messages: false,
|
||||
image_inlining: false,
|
||||
inline_image_quality: 'low',
|
||||
video_inlining: false,
|
||||
bypass_status_check: false,
|
||||
continue_prefill: false,
|
||||
function_calling: false,
|
||||
@@ -593,8 +596,9 @@ function setOpenAIMessages(chat) {
|
||||
if (role == 'user' && oai_settings.wrap_in_quotes) content = `"${content}"`;
|
||||
const name = chat[j]['name'];
|
||||
const image = chat[j]?.extra?.image;
|
||||
const video = chat[j]?.extra?.video;
|
||||
const invocations = chat[j]?.extra?.tool_invocations;
|
||||
messages[i] = { 'role': role, 'content': content, name: name, 'image': image, 'invocations': invocations };
|
||||
messages[i] = { 'role': role, 'content': content, name: name, 'image': image, 'video': video, 'invocations': invocations };
|
||||
j++;
|
||||
}
|
||||
|
||||
@@ -886,6 +890,7 @@ async function populateChatHistory(messages, prompts, chatCompletion, type = nul
|
||||
}
|
||||
|
||||
const imageInlining = isImageInliningSupported();
|
||||
const videoInlining = isVideoInliningSupported();
|
||||
const canUseTools = ToolManager.isToolCallingSupported();
|
||||
|
||||
// Insert chat messages as long as there is budget available
|
||||
@@ -908,6 +913,10 @@ async function populateChatHistory(messages, prompts, chatCompletion, type = nul
|
||||
await chatMessage.addImage(chatPrompt.image);
|
||||
}
|
||||
|
||||
if (videoInlining && chatPrompt.video) {
|
||||
await chatMessage.addVideo(chatPrompt.video);
|
||||
}
|
||||
|
||||
if (canUseTools && Array.isArray(chatPrompt.invocations)) {
|
||||
/** @type {import('./tool-calling.js').ToolInvocation[]} */
|
||||
const invocations = chatPrompt.invocations;
|
||||
@@ -2781,6 +2790,38 @@ class Message {
|
||||
}
|
||||
}
|
||||
|
||||
async addVideo(video) {
|
||||
const textContent = this.content;
|
||||
const isDataUrl = isDataURL(video);
|
||||
if (!isDataUrl) {
|
||||
try {
|
||||
const response = await fetch(video, { method: 'GET', cache: 'force-cache' });
|
||||
if (!response.ok) throw new Error('Failed to fetch video');
|
||||
const blob = await response.blob();
|
||||
video = await getBase64Async(blob);
|
||||
} catch (error) {
|
||||
console.error('Video adding skipped', error);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: No compression for videos (unlike images)
|
||||
this.content = [
|
||||
{ type: 'text', text: textContent },
|
||||
{ type: 'video_url', video_url: { 'url': video } },
|
||||
];
|
||||
|
||||
try {
|
||||
// Convservative estimate for video token cost without knowing duration
|
||||
// Using Gemini calculation (263 tokens per second)
|
||||
const tokens = 10000; // ~40 second video (60 seconds max)
|
||||
this.tokens += tokens;
|
||||
} catch (error) {
|
||||
this.tokens += 10000;
|
||||
console.error('Failed to get video token cost', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress an image if it exceeds the size threshold for the current chat completion source.
|
||||
* @param {string} image Data URL of the image.
|
||||
@@ -3398,6 +3439,7 @@ function loadOpenAISettings(data, settings) {
|
||||
oai_settings.assistant_impersonation = settings.assistant_impersonation ?? default_settings.assistant_impersonation;
|
||||
oai_settings.image_inlining = settings.image_inlining ?? default_settings.image_inlining;
|
||||
oai_settings.inline_image_quality = settings.inline_image_quality ?? default_settings.inline_image_quality;
|
||||
oai_settings.video_inlining = settings.video_inlining ?? default_settings.video_inlining;
|
||||
oai_settings.bypass_status_check = settings.bypass_status_check ?? default_settings.bypass_status_check;
|
||||
oai_settings.show_thoughts = settings.show_thoughts ?? default_settings.show_thoughts;
|
||||
oai_settings.reasoning_effort = settings.reasoning_effort ?? default_settings.reasoning_effort;
|
||||
@@ -3448,6 +3490,8 @@ function loadOpenAISettings(data, settings) {
|
||||
$('#openai_inline_image_quality').val(oai_settings.inline_image_quality);
|
||||
$(`#openai_inline_image_quality option[value="${oai_settings.inline_image_quality}"]`).prop('selected', true);
|
||||
|
||||
$('#openai_video_inlining').prop('checked', oai_settings.video_inlining);
|
||||
|
||||
$('#model_openai_select').val(oai_settings.openai_model);
|
||||
$(`#model_openai_select option[value="${oai_settings.openai_model}"`).prop('selected', true);
|
||||
$('#model_claude_select').val(oai_settings.claude_model);
|
||||
@@ -3824,6 +3868,7 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
|
||||
squash_system_messages: settings.squash_system_messages,
|
||||
image_inlining: settings.image_inlining,
|
||||
inline_image_quality: settings.inline_image_quality,
|
||||
video_inlining: settings.video_inlining,
|
||||
bypass_status_check: settings.bypass_status_check,
|
||||
continue_prefill: settings.continue_prefill,
|
||||
continue_postfix: settings.continue_postfix,
|
||||
@@ -5387,6 +5432,36 @@ export function isImageInliningSupported() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports video inlining
|
||||
* @returns {boolean} True if the model supports video inlining
|
||||
*/
|
||||
export function isVideoInliningSupported() {
|
||||
if (main_api !== 'openai') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!oai_settings.video_inlining) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only Gemini models support video for now
|
||||
const videoSupportedModels = [
|
||||
'gemini-2.0',
|
||||
'gemini-2.5',
|
||||
'gemini-exp-1206',
|
||||
];
|
||||
|
||||
switch (oai_settings.chat_completion_source) {
|
||||
case chat_completion_sources.MAKERSUITE:
|
||||
return videoSupportedModels.some(model => oai_settings.google_model.includes(model));
|
||||
case chat_completion_sources.VERTEXAI:
|
||||
return videoSupportedModels.some(model => oai_settings.vertexai_model.includes(model));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Proxy stuff
|
||||
*/
|
||||
@@ -5945,6 +6020,11 @@ export function initOpenAI() {
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#openai_video_inlining').on('input', function () {
|
||||
oai_settings.video_inlining = !!$(this).prop('checked');
|
||||
saveSettingsDebounced();
|
||||
});
|
||||
|
||||
$('#continue_prefill').on('input', function () {
|
||||
oai_settings.continue_prefill = !!$(this).prop('checked');
|
||||
saveSettingsDebounced();
|
||||
|
Reference in New Issue
Block a user