mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add multimodal captioning for SD prompt generation
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
import { getRequestHeaders } from "../../script.js";
|
||||
import { extension_settings } from "../extensions.js";
|
||||
import { SECRET_KEYS, secret_state } from "../secrets.js";
|
||||
import { createThumbnail } from "../utils.js";
|
||||
|
||||
/**
|
||||
* Generates a caption for an image using a multimodal model.
|
||||
@ -17,6 +18,14 @@ export async function getMultimodalCaption(base64Img, prompt) {
|
||||
throw new Error('OpenRouter API key is not set.');
|
||||
}
|
||||
|
||||
// OpenRouter has a payload limit of ~2MB
|
||||
const base64Bytes = base64Img.length * 0.75;
|
||||
const compressionLimit = 2 * 1024 * 1024;
|
||||
if (extension_settings.caption.multimodal_api === 'openrouter' && base64Bytes > compressionLimit) {
|
||||
const maxSide = 1024;
|
||||
base64Img = await createThumbnail(base64Img, maxSide, maxSide);
|
||||
}
|
||||
|
||||
const apiResult = await fetch('/api/openai/caption-image', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
|
Reference in New Issue
Block a user